## Load data

In [16]:
titan_url = 'https://web.stanford.edu/class/archive/cs/cs109/cs109.1166/stuff/titanic.csv'

In [17]:
import numpy as np
import pandas as pd

In [18]:
df = pd.read_csv(titan_url)
df.head()

Unnamed: 0,Survived,Pclass,Name,Sex,Age,Siblings/Spouses Aboard,Parents/Children Aboard,Fare
0,0,3,Mr. Owen Harris Braund,male,22.0,1,0,7.25
1,1,1,Mrs. John Bradley (Florence Briggs Thayer) Cum...,female,38.0,1,0,71.2833
2,1,3,Miss. Laina Heikkinen,female,26.0,0,0,7.925
3,1,1,Mrs. Jacques Heath (Lily May Peel) Futrelle,female,35.0,1,0,53.1
4,0,3,Mr. William Henry Allen,male,35.0,0,0,8.05


## Navigation & Selection

In [19]:
df.iloc[1]

Survived                                                                   1
Pclass                                                                     1
Name                       Mrs. John Bradley (Florence Briggs Thayer) Cum...
Sex                                                                   female
Age                                                                       38
Siblings/Spouses Aboard                                                    1
Parents/Children Aboard                                                    0
Fare                                                                 71.2833
Name: 1, dtype: object

In [20]:
df2 = df.set_index(df['Name'])
df2.loc['Mr. Owen Harris Braund']

Survived                                        0
Pclass                                          3
Name                       Mr. Owen Harris Braund
Sex                                          male
Age                                            22
Siblings/Spouses Aboard                         1
Parents/Children Aboard                         0
Fare                                         7.25
Name: Mr. Owen Harris Braund, dtype: object

In [21]:
# conditional selection
df[df['Name'] == 'Mr. Owen Harris Braund']

Unnamed: 0,Survived,Pclass,Name,Sex,Age,Siblings/Spouses Aboard,Parents/Children Aboard,Fare
0,0,3,Mr. Owen Harris Braund,male,22.0,1,0,7.25


## Statistics

In [22]:
df[df['Sex'] == 'male'].count()

Survived                   573
Pclass                     573
Name                       573
Sex                        573
Age                        573
Siblings/Spouses Aboard    573
Parents/Children Aboard    573
Fare                       573
dtype: int64

Survived                   573
Pclass                     573
Name                       573
Sex                        573
Age                        573
Siblings/Spouses Aboard    573
Parents/Children Aboard    573
Fare                       573
dtype: int64

In [26]:
df['Sex'].unique()

array(['male', 'female'], dtype=object)

## Re

In [29]:
# Rename col
# inplace is false by default (and as general note should be!)
df.rename(columns={'Sex':'Gender'})

Unnamed: 0,Survived,Pclass,Name,Gender,Age,Siblings/Spouses Aboard,Parents/Children Aboard,Fare
0,0,3,Mr. Owen Harris Braund,male,22.0,1,0,7.2500
1,1,1,Mrs. John Bradley (Florence Briggs Thayer) Cum...,female,38.0,1,0,71.2833
2,1,3,Miss. Laina Heikkinen,female,26.0,0,0,7.9250
3,1,1,Mrs. Jacques Heath (Lily May Peel) Futrelle,female,35.0,1,0,53.1000
4,0,3,Mr. William Henry Allen,male,35.0,0,0,8.0500
...,...,...,...,...,...,...,...,...
882,0,2,Rev. Juozas Montvila,male,27.0,0,0,13.0000
883,1,1,Miss. Margaret Edith Graham,female,19.0,0,0,30.0000
884,0,3,Miss. Catherine Helen Johnston,female,7.0,1,2,23.4500
885,1,1,Mr. Karl Howell Behr,male,26.0,0,0,30.0000


In [30]:
# replace value
df['Name'].replace('Mr. Owen Harris Braund', 'Mr. X')

0                                                  Mr. X
1      Mrs. John Bradley (Florence Briggs Thayer) Cum...
2                                  Miss. Laina Heikkinen
3            Mrs. Jacques Heath (Lily May Peel) Futrelle
4                                Mr. William Henry Allen
                             ...                        
882                                 Rev. Juozas Montvila
883                          Miss. Margaret Edith Graham
884                       Miss. Catherine Helen Johnston
885                                 Mr. Karl Howell Behr
886                                   Mr. Patrick Dooley
Name: Name, Length: 887, dtype: object

## Deletion

In [31]:
df.drop('Name',axis=1)

Unnamed: 0,Survived,Pclass,Sex,Age,Siblings/Spouses Aboard,Parents/Children Aboard,Fare
0,0,3,male,22.0,1,0,7.2500
1,1,1,female,38.0,1,0,71.2833
2,1,3,female,26.0,0,0,7.9250
3,1,1,female,35.0,1,0,53.1000
4,0,3,male,35.0,0,0,8.0500
...,...,...,...,...,...,...,...
882,0,2,male,27.0,0,0,13.0000
883,1,1,female,19.0,0,0,30.0000
884,0,3,female,7.0,1,2,23.4500
885,1,1,male,26.0,0,0,30.0000


In [42]:
# delete row
# simple trick : assign rest rows to other df
# get all dead : R!P
df2 = df[df['Survived'] == 0]
df2

Unnamed: 0,Survived,Pclass,Name,Sex,Age,Siblings/Spouses Aboard,Parents/Children Aboard,Fare
0,0,3,Mr. Owen Harris Braund,male,22.0,1,0,7.2500
4,0,3,Mr. William Henry Allen,male,35.0,0,0,8.0500
5,0,3,Mr. James Moran,male,27.0,0,0,8.4583
6,0,1,Mr. Timothy J McCarthy,male,54.0,0,0,51.8625
7,0,3,Master. Gosta Leonard Palsson,male,2.0,3,1,21.0750
...,...,...,...,...,...,...,...,...
880,0,3,Mr. Henry Jr Sutehall,male,25.0,0,0,7.0500
881,0,3,Mrs. William (Margaret Norton) Rice,female,39.0,0,5,29.1250
882,0,2,Rev. Juozas Montvila,male,27.0,0,0,13.0000
884,0,3,Miss. Catherine Helen Johnston,female,7.0,1,2,23.4500


In [45]:
# drop dupli
df.drop_duplicates(subset=['Sex'],keep='last')

Unnamed: 0,Survived,Pclass,Name,Sex,Age,Siblings/Spouses Aboard,Parents/Children Aboard,Fare
884,0,3,Miss. Catherine Helen Johnston,female,7.0,1,2,23.45
886,0,3,Mr. Patrick Dooley,male,32.0,0,0,7.75


## GroupBy

In [47]:
df.groupby('Sex').sum()

Unnamed: 0_level_0,Survived,Pclass,Age,Siblings/Spouses Aboard,Parents/Children Aboard,Fare
Sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
female,233,678,8704.0,218,204,13966.6628
male,109,1367,17437.17,248,136,14688.2449


## Func

In [58]:
def fn(x):
  return 'नर' if x=='male' else 'महिला'

df['Sex'].apply(fn)

0         नर
1      महिला
2      महिला
3      महिला
4         नर
       ...  
882       नर
883    महिला
884    महिला
885       नर
886       नर
Name: Sex, Length: 887, dtype: object

In [59]:
df.groupby('Sex').apply(lambda x : x.count())

Unnamed: 0_level_0,Survived,Pclass,Name,Sex,Age,Siblings/Spouses Aboard,Parents/Children Aboard,Fare
Sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
female,314,314,314,314,314,314,314,314
male,573,573,573,573,573,573,573,573


$ BY Gaurav Kabra $