In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.ensemble import RandomForestClassifier
from sklearn.impute import SimpleImputer

# training set
train_original = pd.read_csv("/kaggle/input/titanic/train.csv")
# test set
test_original = pd.read_csv("/kaggle/input/titanic/test.csv")

In [None]:
train_original.describe()

In [None]:
train_original.head()

In [None]:
#Exploratory Analysis

sns.barplot(x='Sex', y='Survived', data=train_original)

In [None]:
sns.barplot(x='Pclass', y='Survived', data=train_original)

In [None]:
sns.heatmap(train_original.corr(), annot=True, fmt = ".2f", cmap = "coolwarm")

In [None]:
sns.distplot(train_original['Fare'] + train_original['Survived'])

In [None]:
def getTreatment(name):
    if "mr." in name.lower():
        return 1
    elif "mrs." in name.lower():
        return 2
    elif "miss." in name.lower():
        return 3

def copyData(dt):
    dt_copy = dt.copy()
    dt_copy['Treatment'] = [getTreatment(x) for x in dt_copy["Name"]]
    dt_copy['Sex'] = pd.factorize(dt_copy['Sex'])[0]
    dt_copy['Embarked'] = pd.factorize(dt_copy['Embarked'])[0]
    dt_copy['Family'] = dt_copy['SibSp'] + dt_copy['Parch']
    return dt_copy

train = copyData(train_original)
sns.heatmap(train.corr(), annot=True, fmt = ".2f", cmap = "coolwarm")

In [None]:
features = [x for x in train.columns if x not in ['Survived','Cabin','Ticket','PassengerId','Name','SibSp','Sex']]
# ,'SibSp','Sex','Family'
test = copyData(test_original)

my_imputer = SimpleImputer()
imputed_train = my_imputer.fit_transform(train[features])
imputed_test = my_imputer.transform(test[features])

In [None]:
clf = RandomForestClassifier(n_jobs=2, min_samples_split=3, min_samples_leaf=3, max_leaf_nodes=250, min_impurity_decrease=0.01)
clf.fit(imputed_train, train['Survived'])

In [None]:
result = clf.predict(imputed_test)

output = pd.DataFrame({'PassengerId': test_original.PassengerId,'Survived': result})
output.to_csv('submission.csv', index=False)