In [1]:
# Import required libraries
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, accuracy_score

In [2]:
# Step 1: Define the dataset (from Table 1)
data = {
    'Gender': ['male', 'male', 'male', 'male', 'female', 'female', 'female', 'female'],
    'Height': [6.00, 5.92, 5.58, 5.92, 5.00, 5.50, 5.42, 5.75],
    'Weight': [180, 190, 170, 165, 100, 150, 130, 150],
    'Foot_Size': [12, 11, 12, 10, 6, 8, 7, 9]
}

In [4]:
# Step 2: Construct pandas DataFrame
df = pd.DataFrame(data)

In [5]:
# Step 3: Encode the 'Gender' column using LabelEncoder
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
df['Gender'] = label_encoder.fit_transform(df['Gender'])  # male -> 1, female -> 0


In [6]:
# Step 4: Generate features and labels
features = df[['Height', 'Weight', 'Foot_Size']].values  # Convert to numpy array
labels = df['Gender'].values

In [7]:
# Step 5: Train the Decision Tree Classifier
classifier = DecisionTreeClassifier()
classifier = classifier.fit(features, labels)


In [8]:
# Step 6: Predict the class for a new entry
new_entry = [[6.00, 180, 12]]  # New entry features
prediction = classifier.predict(new_entry)
predicted_label = label_encoder.inverse_transform(prediction)  # Convert back to string label

In [9]:
# Step 7: Output the prediction
print(f"Predicted Gender for new entry {new_entry}: {predicted_label[0]}")


Predicted Gender for new entry [[6.0, 180, 12]]: male


In [11]:
# Step 8: Evaluate the model using a confusion matrix and accuracy score
# Split the dataset for evaluation (train-test split can also be done for larger datasets)
predictions = classifier.predict(features)  # Predict on training data
conf_matrix = confusion_matrix(labels, predictions)
accuracy = accuracy_score(labels, predictions)

print("\nConfusion Matrix:")
print(conf_matrix)
print(f"\nAccuracy Score: {accuracy * 100:.2f}%")


Confusion Matrix:
[[4 0]
 [0 4]]

Accuracy Score: 100.00%
