In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
from fastbook import *
from fastai.imports import *
from fastai.tabular.all import *
import os

In [3]:
path = Path(os.path.expandvars("${HOME}/.fastai/data/titanic"))
path.ls()

(#4) [Path('/home/jmd/.fastai/data/titanic/gender_submission.csv'),Path('/home/jmd/.fastai/data/titanic/titanic.zip'),Path('/home/jmd/.fastai/data/titanic/train.csv'),Path('/home/jmd/.fastai/data/titanic/test.csv')]

In [4]:
df = pd.read_csv(f"{path}/train.csv")
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Thayer)",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


## Explore Pandas

In [17]:
df.iloc[:,0] # Show all ids

0        1
1        2
2        3
3        4
4        5
      ... 
886    887
887    888
888    889
889    890
890    891
Name: PassengerId, Length: 891, dtype: int64

In [18]:
df.iloc[0,:]

PassengerId                          1
Survived                             0
Pclass                               3
Name           Braund, Mr. Owen Harris
Sex                               male
Age                                 22
SibSp                                1
Parch                                0
Ticket                       A/5 21171
Fare                              7.25
Cabin                              NaN
Embarked                             S
Name: 0, dtype: object

In [19]:
df.iloc[0]

PassengerId                          1
Survived                             0
Pclass                               3
Name           Braund, Mr. Owen Harris
Sex                               male
Age                                 22
SibSp                                1
Parch                                0
Ticket                       A/5 21171
Fare                              7.25
Cabin                              NaN
Embarked                             S
Name: 0, dtype: object

## Data Setup

In [5]:
def create_vars():
    cat_names  = ['PassengerId', 'Pclass', 
                  'Name', 'Sex', 'SibSp','Parch', 
                  'Ticket', 'Cabin', 'Embarked']
    cont_names = ['Age', 'Fare']
    procs = [Categorify, FillMissing, Normalize]
    return cat_names, cont_names, procs

In [6]:
cat_names, cont_names, procs = create_vars()

In [7]:
dls = TabularDataLoaders.from_df(
    df, path, procs=procs, cat_names=cat_names, cont_names=cont_names,
    y_names="Survived", bs=64)

In [8]:
dls.show_batch()

Unnamed: 0,PassengerId,Pclass,Name,Sex,SibSp,Parch,Ticket,Cabin,Embarked,Age_na,Age,Fare,Survived
0,23,3,"McGowan, Miss. Anna ""Annie""",female,0,0,330923,#na#,Q,False,15.0,8.0292,1.0
1,697,3,"Kelly, Mr. James",male,0,0,363592,#na#,S,False,44.0,8.049999,0.0
2,171,1,"Van der hoef, Mr. Wyckoff",male,0,0,111240,B19,S,False,60.999999,33.5,0.0
3,847,3,"Sage, Mr. Douglas Bullen",male,8,2,CA. 2343,#na#,S,True,28.0,69.550003,0.0
4,322,3,"Danoff, Mr. Yoto",male,0,0,349219,#na#,S,False,27.0,7.8958,0.0
5,858,1,"Daly, Mr. Peter Denis",male,0,0,113055,E17,S,False,51.0,26.549999,1.0
6,577,2,"Garside, Miss. Ethel",female,0,0,243880,#na#,S,False,34.0,13.000001,1.0
7,877,3,"Gustafsson, Mr. Alfred Ossian",male,0,0,7534,#na#,S,False,20.0,9.845801,0.0
8,746,1,"Crosby, Capt. Edward Gifford",male,1,1,WE/P 5735,B22,S,False,70.0,71.000001,0.0
9,658,3,"Bourke, Mrs. John (Catherine)",female,1,1,364849,#na#,Q,False,32.0,15.5,0.0


## Model Setup

In [9]:
learn = tabular_learner(dls)

In [10]:
learn.fit(5, lr=1e-3)

epoch,train_loss,valid_loss,time
0,0.227174,0.312518,00:00
1,0.136805,0.276801,00:00
2,0.100897,0.249981,00:00
3,0.073775,0.231279,00:00
4,0.055971,0.223039,00:00


## Data Setup with Tabular Pandas

In [11]:
splits = RandomSplitter(valid_pct=0.2)(range_of(df))

In [42]:
cat_names, cont_names, procs = create_vars()
to = TabularPandas(df, procs=procs,
                   cat_names = cat_names,
                   cont_names = cont_names,
                   y_names='Survived',
                   y_block = CategoryBlock,
                   splits=splits)

In [43]:
to.xs.iloc[:2]

Unnamed: 0,PassengerId,Pclass,Name,Sex,SibSp,Parch,Ticket,Cabin,Embarked,Age_na,Age,Fare
498,400,1,19,1,2,3,38,63,3,1,-0.343891,2.308472
809,649,1,153,1,2,1,52,138,3,1,0.273986,0.391927


In [44]:
dls = to.dataloaders(bs=64)

In [45]:
dls.show_batch()

Unnamed: 0,PassengerId,Pclass,Name,Sex,SibSp,Parch,Ticket,Cabin,Embarked,Age_na,Age,Fare,Survived
0,110,3,"Moran, Miss. Bertha",female,1,0,371110,#na#,Q,True,28.0,24.149999,1
1,362,2,"del Carlo, Mr. Sebastiano",male,1,0,SC/PARIS 2167,#na#,C,False,29.0,27.7208,0
2,414,2,"Cunningham, Mr. Alfred Fleming",male,0,0,239853,#na#,S,True,28.0,1e-06,0
3,860,3,"Razi, Mr. Raihed",male,0,0,2629,#na#,C,True,28.0,7.2292,0
4,545,1,"Douglas, Mr. Walter Donald",male,1,0,PC 17761,C86,C,False,49.999999,106.425002,0
5,633,1,"Stahelin-Maeglin, Dr. Max",male,0,0,13214,B50,C,False,32.0,30.5,1
6,100,2,"Kantor, Mr. Sinai",male,1,0,244367,#na#,S,False,34.0,26.0,0
7,269,1,"Graham, Mrs. William Thompson (Edith Junkins)",female,0,1,PC 17582,C125,S,False,57.999999,153.462488,1
8,628,1,"Longley, Miss. Gretchen Fiske",female,0,0,13502,D9,S,False,21.0,77.958298,1
9,48,3,"O'Driscoll, Miss. Bridget",female,0,0,14311,#na#,Q,True,28.0,7.75,1


In [46]:
learn = tabular_learner(dls, metrics=accuracy)

In [47]:
learn.fit_one_cycle(1)

epoch,train_loss,valid_loss,accuracy,time
0,0.632811,0.669007,0.634831,00:00
