In [1]:
!pip install fastai2 -- yes

Collecting fastai2
[?25l  Downloading https://files.pythonhosted.org/packages/cc/50/2f37212be57b7ee3e9c947336f75a66724468b21a3ca68734eaa82e7ebf3/fastai2-0.0.30-py3-none-any.whl (179kB)
[K     |█▉                              | 10kB 17.9MB/s eta 0:00:01[K     |███▋                            | 20kB 23.3MB/s eta 0:00:01[K     |█████▌                          | 30kB 18.7MB/s eta 0:00:01[K     |███████▎                        | 40kB 15.0MB/s eta 0:00:01[K     |█████████▏                      | 51kB 16.3MB/s eta 0:00:01[K     |███████████                     | 61kB 15.0MB/s eta 0:00:01[K     |████████████▉                   | 71kB 12.3MB/s eta 0:00:01[K     |██████████████▋                 | 81kB 13.5MB/s eta 0:00:01[K     |████████████████▍               | 92kB 14.1MB/s eta 0:00:01[K     |██████████████████▎             | 102kB 11.5MB/s eta 0:00:01[K     |████████████████████            | 112kB 11.5MB/s eta 0:00:01[K     |██████████████████████          | 122kB 11.

In [2]:
!pip install fastcore==0.1.35 ##Currently supported with fastai2

Collecting fastcore==0.1.35
  Downloading https://files.pythonhosted.org/packages/f5/bc/a50e6c70d54042576ba07d4ca7e0d2f4deeb0fc5d621981a0c9536e2f4ee/fastcore-0.1.35-py3-none-any.whl
Installing collected packages: fastcore
  Found existing installation: fastcore 1.3.19
    Uninstalling fastcore-1.3.19:
      Successfully uninstalled fastcore-1.3.19
Successfully installed fastcore-0.1.35


In [3]:
import fastcore
print(fastcore.__version__)

0.1.35


In [4]:
import pandas as pd
import numpy as np

from fastai2.tabular.all import *
import xgboost

from sklearn.metrics import accuracy_score

In [5]:
df_train = pd.read_csv('train-extended.csv')
df_test = pd.read_csv('test-extended.csv')

In [6]:
df_train.columns.to_series().groupby(df_train.dtypes).groups

{int64: ['PassengerId', 'Pclass', 'SibSp', 'Parch'], float64: ['Survived', 'Age', 'Fare', 'WikiId', 'Age_wiki', 'Class'], object: ['Name', 'Sex', 'Ticket', 'Cabin', 'Embarked', 'Name_wiki', 'Hometown', 'Boarded', 'Destination', 'Lifeboat', 'Body']}

In [7]:
cat_names= [
        'Name', 'Sex', 'Ticket', 'Cabin', 
        'Embarked', 'Name_wiki', 'Hometown', 
        'Boarded', 'Destination', 'Lifeboat', 
        'Body'
]

cont_names = [ 
    'PassengerId', 'Pclass', 'SibSp', 'Parch', 
    'Age', 'Fare', 'WikiId', 'Age_wiki','Class'
 ]

In [8]:
splits = RandomSplitter(valid_pct=0.2)(range_of(df_train))
df_tp = TabularPandas(df_train, procs=[Categorify, FillMissing, Normalize],
                      cat_names=cat_names,
                      cont_names=cont_names,
                      y_names='Survived',
                      splits=splits)

In [9]:
X_train, y_train = df_tp.train.xs, df_tp.train.ys.values.ravel()
X_val, y_val = df_tp.valid.xs, df_tp.valid.ys.values.ravel()

In [10]:
xgb_classifier = xgboost.XGBClassifier()
xgb_classifier.fit(X_train, y_train)

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, gamma=0,
              learning_rate=0.1, max_delta_step=0, max_depth=3,
              min_child_weight=1, missing=None, n_estimators=100, n_jobs=1,
              nthread=None, objective='binary:logistic', random_state=0,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
              silent=None, subsample=1, verbosity=1)

In [11]:
train_preds = xgb_classifier.predict(X_train)
print('Training accuracy: {:.2f}%'.format(accuracy_score(y_train, train_preds) * 100))
val_preds = xgb_classifier.predict(X_val)
print('Validation accuracy: {:.2f}%'.format(accuracy_score(y_val, val_preds) * 100))

Training accuracy: 99.58%
Validation accuracy: 99.44%


In [12]:
test_tp = TabularPandas(df_test, procs=[Categorify, FillMissing, Normalize],
                        cat_names=cat_names,
                        cont_names=cont_names)

In [13]:
X_test = test_tp.train.xs

In [14]:
X_test.head()

Unnamed: 0,Name,Sex,Ticket,Cabin,Embarked,Name_wiki,Hometown,Boarded,Destination,Lifeboat,Body,Age_na,WikiId_na,Age_wiki_na,Class_na,Fare_na,PassengerId,Pclass,SibSp,Parch,Age,Fare,WikiId,Age_wiki,Class
0,207,2,153,0,2,194,226,4,103,0,39,1,1,1,1,1,-1.727912,0.873482,-0.49947,-0.400248,0.386231,-0.497413,0.752828,-0.794545,0.87114
1,404,1,222,0,3,389,162,4,2,9,0,1,1,1,1,1,-1.719625,0.873482,0.616992,-0.400248,1.371369,-0.512278,1.729823,1.304755,0.87114
2,270,2,74,0,2,257,65,3,154,0,0,1,1,1,1,1,-1.711337,-0.315819,-0.49947,-0.400248,2.553536,-0.4641,-0.332721,2.504355,-0.311121
3,409,2,148,0,3,393,233,4,13,0,5,1,1,1,1,1,-1.70305,0.873482,-0.49947,-0.400248,-0.204852,-0.482475,1.745709,-0.194745,0.87114
4,179,1,139,0,3,167,208,4,94,7,0,1,1,1,1,1,-1.694763,0.873482,0.616992,0.619896,-0.598908,-0.417491,0.60191,-0.56962,0.87114


In [15]:
X_test.drop(['Fare_na'], axis=1, inplace=True)

In [16]:
X_test.head()

Unnamed: 0,Name,Sex,Ticket,Cabin,Embarked,Name_wiki,Hometown,Boarded,Destination,Lifeboat,Body,Age_na,WikiId_na,Age_wiki_na,Class_na,PassengerId,Pclass,SibSp,Parch,Age,Fare,WikiId,Age_wiki,Class
0,207,2,153,0,2,194,226,4,103,0,39,1,1,1,1,-1.727912,0.873482,-0.49947,-0.400248,0.386231,-0.497413,0.752828,-0.794545,0.87114
1,404,1,222,0,3,389,162,4,2,9,0,1,1,1,1,-1.719625,0.873482,0.616992,-0.400248,1.371369,-0.512278,1.729823,1.304755,0.87114
2,270,2,74,0,2,257,65,3,154,0,0,1,1,1,1,-1.711337,-0.315819,-0.49947,-0.400248,2.553536,-0.4641,-0.332721,2.504355,-0.311121
3,409,2,148,0,3,393,233,4,13,0,5,1,1,1,1,-1.70305,0.873482,-0.49947,-0.400248,-0.204852,-0.482475,1.745709,-0.194745,0.87114
4,179,1,139,0,3,167,208,4,94,7,0,1,1,1,1,-1.694763,0.873482,0.616992,0.619896,-0.598908,-0.417491,0.60191,-0.56962,0.87114


In [17]:
test_preds = xgb_classifier.predict(X_test)
test_preds = test_preds.astype(int)

In [18]:
output= pd.DataFrame({'PassengerId':df_test.PassengerId, 'Survived': test_preds})
output.to_csv('my_submission_titanic.csv', index=False)
output.head()

Unnamed: 0,PassengerId,Survived
0,892,0
1,893,1
2,894,0
3,895,0
4,896,1
