### Review of Pandas

There are two default data structures in Pandas.
- Series: Homogenous and Indexed
- Dataframe: Created from multiple series

df.head() -- return top n rows from a Dataframe
df.tail() -- return the bottom n rows from a Dataframe
df[col_name] -- return the specified series
df.shape -- returns # of rows x columns
.isin -- filter using a range or iterable
df[df[column_name] condition] -- returns rows where condition is met
df.loc[rows, columns] -- create a subsection dataframe
df.columns -- returns column names 
df.iloc[rows, columns] -- returns location of subsection

In [19]:
import pandas as pd

air_qual = pd.read_csv('https://raw.githubusercontent.com/pandas-dev/pandas/main/doc/data/air_quality_no2.csv', parse_dates=True)
air_qual

Unnamed: 0,datetime,station_antwerp,station_paris,station_london
0,2019-05-07 02:00:00,,,23.0
1,2019-05-07 03:00:00,50.5,25.0,19.0
2,2019-05-07 04:00:00,45.0,27.7,19.0
3,2019-05-07 05:00:00,,50.4,16.0
4,2019-05-07 06:00:00,,61.9,
...,...,...,...,...
1030,2019-06-20 22:00:00,,21.4,
1031,2019-06-20 23:00:00,,24.9,
1032,2019-06-21 00:00:00,,26.5,
1033,2019-06-21 01:00:00,,21.8,


## Creating Derived Columns

In [20]:
air_qual['london_mg_per_cubic'] = air_qual['station_london'] * 1.882
air_qual

Unnamed: 0,datetime,station_antwerp,station_paris,station_london,london_mg_per_cubic
0,2019-05-07 02:00:00,,,23.0,43.286
1,2019-05-07 03:00:00,50.5,25.0,19.0,35.758
2,2019-05-07 04:00:00,45.0,27.7,19.0,35.758
3,2019-05-07 05:00:00,,50.4,16.0,30.112
4,2019-05-07 06:00:00,,61.9,,
...,...,...,...,...,...
1030,2019-06-20 22:00:00,,21.4,,
1031,2019-06-20 23:00:00,,24.9,,
1032,2019-06-21 00:00:00,,26.5,,
1033,2019-06-21 01:00:00,,21.8,,


In [21]:
air_qual['avg_station'] = (air_qual['station_paris'] + air_qual['station_antwerp'] + air_qual['station_london']) / 3
air_qual

Unnamed: 0,datetime,station_antwerp,station_paris,station_london,london_mg_per_cubic,avg_station
0,2019-05-07 02:00:00,,,23.0,43.286,
1,2019-05-07 03:00:00,50.5,25.0,19.0,35.758,31.500000
2,2019-05-07 04:00:00,45.0,27.7,19.0,35.758,30.566667
3,2019-05-07 05:00:00,,50.4,16.0,30.112,
4,2019-05-07 06:00:00,,61.9,,,
...,...,...,...,...,...,...
1030,2019-06-20 22:00:00,,21.4,,,
1031,2019-06-20 23:00:00,,24.9,,,
1032,2019-06-21 00:00:00,,26.5,,,
1033,2019-06-21 01:00:00,,21.8,,,


## Renaming Columns


In [22]:
air_qual.rename(columns={'station_antwerp' : 'BT3003'}, inplace=True)
air_qual

Unnamed: 0,datetime,BT3003,station_paris,station_london,london_mg_per_cubic,avg_station
0,2019-05-07 02:00:00,,,23.0,43.286,
1,2019-05-07 03:00:00,50.5,25.0,19.0,35.758,31.500000
2,2019-05-07 04:00:00,45.0,27.7,19.0,35.758,30.566667
3,2019-05-07 05:00:00,,50.4,16.0,30.112,
4,2019-05-07 06:00:00,,61.9,,,
...,...,...,...,...,...,...
1030,2019-06-20 22:00:00,,21.4,,,
1031,2019-06-20 23:00:00,,24.9,,,
1032,2019-06-21 00:00:00,,26.5,,,
1033,2019-06-21 01:00:00,,21.8,,,


In [23]:
air_qual.columns = air_qual.columns.str.replace('_', " ")
air_qual

Unnamed: 0,datetime,BT3003,station paris,station london,london mg per cubic,avg station
0,2019-05-07 02:00:00,,,23.0,43.286,
1,2019-05-07 03:00:00,50.5,25.0,19.0,35.758,31.500000
2,2019-05-07 04:00:00,45.0,27.7,19.0,35.758,30.566667
3,2019-05-07 05:00:00,,50.4,16.0,30.112,
4,2019-05-07 06:00:00,,61.9,,,
...,...,...,...,...,...,...
1030,2019-06-20 22:00:00,,21.4,,,
1031,2019-06-20 23:00:00,,24.9,,,
1032,2019-06-21 00:00:00,,26.5,,,
1033,2019-06-21 01:00:00,,21.8,,,


## Calculating Statistics

In [24]:
titanic = pd.read_csv('https://raw.githubusercontent.com/pandas-dev/pandas/main/doc/data/titanic.csv')
titanic

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


In [25]:
titanic['Age'].mean()

29.69911764705882

In [26]:
titanic.describe()

Unnamed: 0,PassengerId,Survived,Pclass,Age,SibSp,Parch,Fare
count,891.0,891.0,891.0,714.0,891.0,891.0,891.0
mean,446.0,0.383838,2.308642,29.699118,0.523008,0.381594,32.204208
std,257.353842,0.486592,0.836071,14.526497,1.102743,0.806057,49.693429
min,1.0,0.0,1.0,0.42,0.0,0.0,0.0
25%,223.5,0.0,2.0,20.125,0.0,0.0,7.9104
50%,446.0,0.0,3.0,28.0,0.0,0.0,14.4542
75%,668.5,1.0,3.0,38.0,1.0,0.0,31.0
max,891.0,1.0,3.0,80.0,8.0,6.0,512.3292
