In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, mean_squared_error, f1_score
import matplotlib.pyplot as plt

In [2]:
df=pd.read_csv("titanic.csv")
df=df[["Age","Sex","Pclass","Survived"]]

In [3]:
df.dropna(inplace=True)
df["Age"].astype(int)
df["Sex"]=df["Sex"].map({"male":0,"female":1})

In [4]:
x=df[["Age","Sex","Pclass"]]
y=df["Survived"]

In [5]:
x_train, x_test, y_train, y_test=train_test_split(x,y,random_state=42)

In [6]:
models={
    "Logistic Regression":LogisticRegression(),
    "K-Nearest Neighbours": KNeighborsClassifier(),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(),
    "SVM": SVC(),
    "Naive Bayes":GaussianNB()
}

In [8]:
result=[]
new=pd.DataFrame([[20,1,3]],columns=['Age',"Sex","Pclass"])
for name, model in models.items():
    pl=Pipeline([
        ('scaler',StandardScaler()),
        ('classifier',model)
    ])
    pl.fit(x_train,y_train)
    y_pred=pl.predict(x_test)
    prediction=pl.predict(new)[0]
    result.append({
    'Model Name': name,
    'Accuracy': round(accuracy_score(y_test,y_pred),4),
    'f1 score': round(f1_score(y_test,y_pred),4),
    'MSE': round(mean_squared_error(y_test,y_pred),4),
    'Prediction': "Survived" if prediction==1 else 'Not Survived'
    })
print('Prediction using different models for new passenger')
result_df=pd.DataFrame(result).sort_values(by='Accuracy',ascending=False)
print(result_df.reset_index(drop=True))

Prediction using different models for new passenger
             Model Name  Accuracy  f1 score     MSE    Prediction
0   Logistic Regression    0.7709    0.7133  0.2291      Survived
1  K-Nearest Neighbours    0.7709    0.6963  0.2291      Survived
2         Random Forest    0.7709    0.7092  0.2291  Not Survived
3                   SVM    0.7709    0.6917  0.2291      Survived
4         Decision Tree    0.7654    0.6957  0.2346  Not Survived
5           Naive Bayes    0.7654    0.7000  0.2346      Survived


In [9]:
tree_model = models["Decision Tree"]
tree_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('classifier', tree_model)
])
tree_pipeline.fit(X_train, y_train)
plt.figure(figsize=(16, 8))
plot_tree(tree_pipeline.named_steps['classifier'],
          feature_names=['age', 'sex', 'pclass'],
          class_names=['Not Survived', 'Survived'],
          filled=True, rounded=True)
plt.title("Decision Tree Visualization")
plt.show()

NameError: name 'X_train' is not defined