In [None]:
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import pickle

In [None]:
df = pd.read_csv('titanic_raw.csv')

In [None]:
df.head()

In [None]:
df.describe()

In [None]:
df.dtypes

In [None]:
df.corr()['survived']

In [None]:
df = df.loc[:,('pclass','age','sex','survived','embarked')]

In [None]:
df.sample(10)

In [None]:
df.isna().sum

In [None]:
df.shape

In [None]:
df = df.loc[df.embarked.notna(),:]

In [None]:
df.nunique()

In [None]:
df['pclass'].unique()

In [None]:
df['embarked'].unique()

In [None]:
df_x = df.drop('survived', axis='columns')
df_y = df.survived

In [None]:
x_train, x_test, y_train, y_test = train_test_split(df_x, df_y, test_size= 0.3, stratify = df_y)

In [None]:
num_pipeline = Pipeline([
('imputer', SimpleImputer(strategy='median')),
('std_scaler', StandardScaler()),
])

In [None]:
col_transformer = ColumnTransformer([
    ('num',num_pipeline, ['age','pclass',]),
    ('cat', OneHotEncoder(), ['sex', 'embarked'])
])

In [None]:
pipeline = Pipeline([
('ct', col_transformer),
('clf', LogisticRegression())
])


In [None]:
pipeline.fit(x_train, y_train)

In [None]:
scores = cross_val_score(pipeline, x_train, y_train, scoring='accuracy', cv=5)

In [None]:
scores.mean()

In [None]:
pickle.dump(pipeline, open('pipeline.pkl', 'wb'))

In [None]:
y_test_predicted = pipeline.predict(x_test)

In [None]:
accuracy_score(y_test, y_test_predicted)

In [None]:
cm = confusion_matrix(y_test, y_test_predicted, labels=pipeline.classes_)
cm

In [None]:
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=pipeline.classes_)


In [None]:
disp.plot()
plt.show()


In [None]:
pipeline = pickle.load(open('pipeline.pkl', 'rb'))