In [109]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder

from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, accuracy_score

import jovian

In [110]:
df = pd.read_csv('/home/arunachal/Programming/scripts/Titanic/train.csv')

In [111]:
X = df.drop(['Survived'], axis=1)
y = df['Survived']

In [112]:
X_train_full, X_test_full, y_train, y_test = train_test_split(X, y, train_size=0.8, test_size=0.2, random_state=0)

In [113]:
cat_cols_low_card = [col for col in X_train_full.columns if X_train_full[col].dtype == 'object' and X_train_full[col].nunique() < 10]
num_cols = [col for col in X_train_full.columns if X_train_full[col].dtype in ['int64', 'float64']]

cols_to_use = cat_cols_low_card + num_cols

X_train = X_train_full[cols_to_use].copy()
X_test = X_test_full[cols_to_use].copy()

In [114]:
num_transformer = SimpleImputer(strategy='constant')
cat_transformer = Pipeline(steps=[('imputer', SimpleImputer(strategy='most_frequent')), ('one_hot', OneHotEncoder(handle_unknown='ignore'))])

preprocessor = ColumnTransformer(transformers=[('num', num_transformer, num_cols), ('cat', cat_transformer, cat_cols_low_card)])

In [115]:
model = RandomForestClassifier(n_estimators=500, random_state=0)

In [116]:
my_pipeline = Pipeline(steps=[('preprocessor', preprocessor), ('model', model)])

In [117]:
my_pipeline.fit(X_train, y_train)
y_pred = my_pipeline.predict(X_test)
confusion_matrix(y_test, y_pred)

array([[103,   7],
       [ 17,  52]])

In [118]:
accuracy_score(y_test, y_pred)

0.8659217877094972

In [119]:
jovian.commit()

<IPython.core.display.Javascript object>

[jovian] Committed successfully! https://jovian.ai/kr-arunachal/titanic-pipeline-rfc[0m


'https://jovian.ai/kr-arunachal/titanic-pipeline-rfc'