# Learning from Pandas Official Document

In [2]:
%matplotlib inline
import numpy as np
import numpy.ma as ma
import pandas as pd
from pandas import DataFrame, Series
import matplotlib.pyplot as plt
import seaborn as sns
import timeit
import line_profiler

In [None]:
titanic = pd.read_csv('data/titanic.csv')
titanic.head()

In [3]:
df = DataFrame({
    'Dimensions':[1,2],
    'Name':['Series', 'DataFrame'],
    'Description':['1D labeled homogeneously-typed array',
                  'General 2D labeled, size-mutable tabular structure with potentially heterogeneously-typed column']
})
df

Unnamed: 0,Dimensions,Name,Description
0,1,Series,1D labeled homogeneously-typed array
1,2,DataFrame,"General 2D labeled, size-mutable tabular struc..."


In [4]:
df = pd.DataFrame({
    'Names' : ['Ramesh Kumar', 'Suresh Kumar', 'Rupesh Kumar'],
    'Age' : [22, 35, 58],
    'Sex' : ['male', 'male', 'female']
})
df

Unnamed: 0,Names,Age,Sex
0,Ramesh Kumar,22,male
1,Suresh Kumar,35,male
2,Rupesh Kumar,58,female


In [5]:
ages = pd.Series([22, 35, 58])
ages

0    22
1    35
2    58
dtype: int64

In [6]:
df['Age'].max()

58

In [7]:
ages.max()

58

In [8]:
df.describe()

Unnamed: 0,Age
count,3.0
mean,38.333333
std,18.230012
min,22.0
25%,28.5
50%,35.0
75%,46.5
max,58.0


In [10]:
titanic.head(8)

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0,,S
2,894,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,Q
3,895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S
5,897,3,"Svensson, Mr. Johan Cervin",male,14.0,0,0,7538,9.225,,S
6,898,3,"Connolly, Miss. Kate",female,30.0,0,0,330972,7.6292,,Q
7,899,2,"Caldwell, Mr. Albert Francis",male,26.0,1,1,248738,29.0,,S


In [11]:
titanic.dtypes

PassengerId      int64
Pclass           int64
Name            object
Sex             object
Age            float64
SibSp            int64
Parch            int64
Ticket          object
Fare           float64
Cabin           object
Embarked        object
dtype: object

In [13]:
titanic.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 418 entries, 0 to 417
Data columns (total 11 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  418 non-null    int64  
 1   Pclass       418 non-null    int64  
 2   Name         418 non-null    object 
 3   Sex          418 non-null    object 
 4   Age          332 non-null    float64
 5   SibSp        418 non-null    int64  
 6   Parch        418 non-null    int64  
 7   Ticket       418 non-null    object 
 8   Fare         417 non-null    float64
 9   Cabin        91 non-null     object 
 10  Embarked     418 non-null    object 
dtypes: float64(2), int64(4), object(5)
memory usage: 36.0+ KB


In [15]:
ages = titanic['Age']
ages.head()

0    34.5
1    47.0
2    62.0
3    27.0
4    22.0
Name: Age, dtype: float64

In [16]:
type(titanic['Age']) # pandas.core.series.Series

pandas.core.series.Series

In [17]:
titanic['Age'].shape # (418,)

(418,)

In [18]:
titanic.shape # (418, 11)

(418, 11)

In [21]:
age_sex = titanic[['Age', 'Sex']]
age_sex.head()
type(age_sex) # pandas.core.frame.DataFrame
age_sex.shape # (418, 2)

(418, 2)

In [29]:
above_35 = titanic[titanic['Age'] > 35]
above_35.head()
above_35.shape # (105, 11)

(105, 11)

In [28]:
(titanic['Age']>35).head()

0    False
1     True
2     True
3    False
4    False
Name: Age, dtype: bool

In [32]:
class_23 = titanic[titanic['Pclass'].isin([2,3])]
class_23.head()

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0,,S
2,894,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,Q
3,895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S


In [37]:
class_32 = titanic[(titanic['Pclass'] == 2 ) | (titanic['Pclass'] == 3 )]


False

In [39]:
age_not_na = titanic[titanic['Age'].notna()]
age_not_na.shape # (332, 11)

(332, 11)

In [46]:
adult_names = titanic.loc[titanic['Age']>35, 'Name']
adult_names.head()     

1                      Wilkes, Mrs. James (Ellen Needs)
2                             Myles, Mr. Thomas Francis
11                           Jones, Mr. Charles Cresson
13                                 Howard, Mr. Benjamin
14    Chaffee, Mrs. Herbert Fuller (Carrie Constance...
Name: Name, dtype: object

In [47]:
titanic.iloc[9:25, 2:5]

Unnamed: 0,Name,Sex,Age
9,"Davies, Mr. John Samuel",male,21.0
10,"Ilieff, Mr. Ylio",male,
11,"Jones, Mr. Charles Cresson",male,46.0
12,"Snyder, Mrs. John Pillsbury (Nelle Stevenson)",female,23.0
13,"Howard, Mr. Benjamin",male,63.0
14,"Chaffee, Mrs. Herbert Fuller (Carrie Constance...",female,47.0
15,"del Carlo, Mrs. Sebastiano (Argenia Genovesi)",female,24.0
16,"Keane, Mr. Daniel",male,35.0
17,"Assaf, Mr. Gerios",male,21.0
18,"Ilmakangas, Miss. Ida Livija",female,27.0


In [55]:
titanic.iloc[:3,2] = 'anonymous'
titanic

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,892,3,anonymous,anonymous,34.5,0,0,330911,7.8292,,Q
1,893,3,anonymous,anonymous,47.0,1,0,363272,7.0000,,S
2,894,2,anonymous,anonymous,62.0,0,0,240276,9.6875,,Q
3,895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S
...,...,...,...,...,...,...,...,...,...,...,...
413,1305,3,"Spector, Mr. Woolf",male,,0,0,A.5. 3236,8.0500,,S
414,1306,1,"Oliva y Ocana, Dona. Fermina",female,39.0,0,0,PC 17758,108.9000,C105,C
415,1307,3,"Saether, Mr. Simon Sivertsen",male,38.5,0,0,SOTON/O.Q. 3101262,7.2500,,S
416,1308,3,"Ware, Mr. Frederick",male,,0,0,359309,8.0500,,S


In [58]:
titanic.head()

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0,,S
2,894,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,Q
3,895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S


# NO2 air quality index data - https://openaq.org/#/projects?page=1&parameters=7 

In [59]:
air_quality = pd.read_csv("data/air_quality_no2.csv")
air_quality.head()

Unnamed: 0,locationId,location,city,country,utc,local,parameter,value,unit,latitude,longitude
0,61487,Google Street View Car,London,GB,2019-05-02T22:19:44+00:00,2019-05-02T23:19:44+01:00,co2,519.498,ppm,51.497215,-0.111952
1,61487,Google Street View Car,London,GB,2019-05-02T22:19:43+00:00,2019-05-02T23:19:43+01:00,co2,551.987,ppm,51.497217,-0.11195
2,61487,Google Street View Car,London,GB,2019-05-02T22:19:42+00:00,2019-05-02T23:19:42+01:00,co2,610.856,ppm,51.497218,-0.11195
3,61487,Google Street View Car,London,GB,2019-05-02T22:19:42+00:00,2019-05-02T23:19:42+01:00,pm25,2.016,µg/m³,51.497218,-0.11195
4,61487,Google Street View Car,London,GB,2019-05-02T22:19:42+00:00,2019-05-02T23:19:42+01:00,no2,0.018198,ppm,51.497218,-0.11195
