In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder

# Load training data from train.csv
train_data = pd.read_csv('train.csv')

In [2]:
# Set male to 1 and female to 0
train_data['Sex'] = train_data['Sex'].map({'male': 1, 'female': 0})

In [3]:
# Exclude non-numeric columns 
numeric_data = train_data.select_dtypes(include=[np.number])

# Compute the correlation matrix
correlation_matrix = numeric_data.corr()

# Display correlation
survived_correlation = correlation_matrix['Survived'].sort_values(ascending=False)
print(survived_correlation)

Survived       1.000000
Fare           0.257307
Parch          0.081629
PassengerId   -0.005007
SibSp         -0.035322
Age           -0.077221
Pclass        -0.338481
Sex           -0.543351
Name: Survived, dtype: float64


## Decision Tree

In [7]:
# Select features for the model
X = train_data[['Fare', 'Pclass', 'Sex']]
y = train_data['Survived']

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Exclude non-numeric columns
numeric_columns = X_train.select_dtypes(include=[np.number]).columns
X_train_numeric = X_train[numeric_columns]
X_test_numeric = X_test[numeric_columns]

# Apply the imputer to the numeric data
imputer = SimpleImputer(strategy='mean')
X_train_imputed = imputer.fit_transform(X_train_numeric)
X_test_imputed = imputer.transform(X_test_numeric)

# Train the decision tree classifier
tree_clf = DecisionTreeClassifier(
    random_state=42,
    max_depth=5,
    min_samples_split=10,
    max_leaf_nodes=10,
    class_weight={0: 20, 1: 80},
)
tree_clf.fit(X_train_imputed, y_train)

# Predict
y_pred_tree = tree_clf.predict(X_test_imputed)

# Calculate the accuracy
accuracy_tree = accuracy_score(y_test, y_pred_tree)

# Display the accuracy
accuracy_tree

0.7821229050279329