In [9]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier, export_text
from sklearn.metrics import accuracy_score

# 1. Load the datasets
titanic_df = pd.read_csv('titanic.csv')
titanic_test_df = pd.read_csv('titanic_test.csv')
titanic_testlabel_df = pd.read_csv('titanic_testlabel.csv')

# 3. Prepare train_data
train_data_features = titanic_df[['Sex', 'Age', 'Pclass', 'Fare']].copy()
train_data_features['Sex'] = train_data_features['Sex'].map({'male': 0, 'female': 1})
train_data_features['Age'] = train_data_features.groupby('Pclass')['Age'].transform(lambda x: x.fillna(x.mean()))
train_data_features['Fare'] = train_data_features['Fare'].fillna(train_data_features['Fare'].mean())

# 4. Prepare test_data
test_data_features = titanic_test_df[['Sex', 'Age', 'Pclass', 'Fare']].copy()
test_data_features['Sex'] = test_data_features['Sex'].map({'male': 0, 'female': 1})
test_data_features['Age'] = test_data_features.groupby('Pclass')['Age'].transform(lambda x: x.fillna(x.mean()))
test_data_features['Fare'] = test_data_features['Fare'].fillna(test_data_features['Fare'].mean())

# 5. Prepare train_label
train_label = titanic_df['Survived']

# 6. Prepare test_label
test_label = titanic_testlabel_df['Survived']

# 7. Decision Tree Classification
dt_classifier = DecisionTreeClassifier(random_state=42)
dt_classifier.fit(train_data_features, train_label)
predictions = dt_classifier.predict(test_data_features)
accuracy = accuracy_score(test_label, predictions)
error_ratio = 1 - accuracy

# Store results for output
results = {
    'Metric': ['Accuracy', 'Error Ratio'],
    'Value': [accuracy, error_ratio]
}
results_df = pd.DataFrame(results)

# 8. Display Hierarchy of Decision Tree
tree_rules = export_text(dt_classifier, feature_names=list(train_data_features.columns))

print("Results:")
print(results_df.to_markdown(index=False))
print("\nDecision Tree Hierarchy:")
print(tree_rules)

Results:
| Metric      |    Value |
|:------------|---------:|
| Accuracy    | 0.767943 |
| Error Ratio | 0.232057 |

Decision Tree Hierarchy:
|--- Sex <= 0.50
|   |--- Age <= 6.50
|   |   |--- Pclass <= 2.50
|   |   |   |--- class: 1
|   |   |--- Pclass >  2.50
|   |   |   |--- Fare <= 20.83
|   |   |   |   |--- class: 1
|   |   |   |--- Fare >  20.83
|   |   |   |   |--- Age <= 2.50
|   |   |   |   |   |--- class: 0
|   |   |   |   |--- Age >  2.50
|   |   |   |   |   |--- Fare <= 31.33
|   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |--- Fare >  31.33
|   |   |   |   |   |   |--- class: 1
|   |--- Age >  6.50
|   |   |--- Pclass <= 1.50
|   |   |   |--- Age <= 36.50
|   |   |   |   |--- Fare <= 37.81
|   |   |   |   |   |--- Fare <= 15.64
|   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |--- Fare >  15.64
|   |   |   |   |   |   |--- Age <= 31.00
|   |   |   |   |   |   |   |--- Age <= 28.50
|   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |  