In [53]:
### DecisionTree optimal Accuracy

import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix

# Load the Titanic dataset
file_path = "titanic_dataset.csv"
titanic_data = pd.read_csv(file_path)

# Select relevant columns
selected_columns = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked', 'Survived']
titanic_data = titanic_data[selected_columns]

# Map 'Sex' column to numerical values
titanic_data['Sex'] = titanic_data['Sex'].map({'male': 0, 'female': 1})

# Handle missing values in 'Age' and 'Embarked'
titanic_data['Age'].fillna(titanic_data['Age'].median(), inplace=True)
titanic_data['Embarked'].fillna(titanic_data['Embarked'].mode()[0], inplace=True)

# Identify and handle categorical columns
titanic_data = pd.get_dummies(titanic_data, columns=['Embarked'], drop_first=True)

# Separate features (X) and target variable (y)
X = titanic_data.drop('Survived', axis=1)
y = titanic_data['Survived']

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Use a Decision Tree Classifier with optimized hyperparameters
model = DecisionTreeClassifier(max_depth=4, min_samples_split=4, min_samples_leaf=2, random_state=42)

# Train the model
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Calculate accuracy and confusion matrix
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print("Accuracy:", accuracy)
print("Confusion Matrix:\n", conf_matrix)


Accuracy: 0.8208955223880597
Confusion Matrix:
 [[140  17]
 [ 31  80]]


In [54]:
titanic_data

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Fare,Survived,Embarked_Q,Embarked_S
0,3,0,22.0,1,0,7.2500,0,False,True
1,1,1,38.0,1,0,71.2833,1,False,False
2,3,1,26.0,0,0,7.9250,1,False,True
3,1,1,35.0,1,0,53.1000,1,False,True
4,3,0,35.0,0,0,8.0500,0,False,True
...,...,...,...,...,...,...,...,...,...
886,2,0,27.0,0,0,13.0000,0,False,True
887,1,1,19.0,0,0,30.0000,1,False,True
888,3,1,28.0,1,2,23.4500,0,False,True
889,1,0,26.0,0,0,30.0000,1,False,False
