# Decision Tree Model

In [1]:
# Initial imports
import pandas as pd
from path import Path
from sklearn import tree
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

# Loading and Preprocessing Data

In [2]:
# Loading data
file_path = Path("happiness_top12_df.csv")
df = pd.read_csv(file_path)
df.head()

Unnamed: 0,ladder_score,healty_life_exp,covid_tests_per_mil,freedom,social_support,percept_corrupt,log_gdp_per_cap,covid_total_tests,meat_consumption,median_age,generosity,land_area_skm,covid_cases_per_mil
0,2,52.493,2317.0,0.382,0.463,0.924,7.695,90396.0,17.3,27.4,-0.102,652230.0,946.0
1,5,69.0,17564.0,0.828,0.898,0.834,9.962,794544.0,79.7,31.7,-0.182,2780400.0,5044.0
2,5,67.055,57898.0,0.825,0.799,0.629,9.487,171600.0,27.7,35.1,-0.168,29743.0,13435.0
3,7,73.9,181419.0,0.914,0.94,0.442,10.796,4631419.0,108.2,38.7,0.159,7741220.0,779.0
4,7,73.3,104008.0,0.908,0.934,0.481,10.906,937275.0,94.1,44.0,0.042,83871.0,2408.0


In [3]:
# Define features set
X = df.copy()
X = X.drop("ladder_score", axis=1)
X.head()

Unnamed: 0,healty_life_exp,covid_tests_per_mil,freedom,social_support,percept_corrupt,log_gdp_per_cap,covid_total_tests,meat_consumption,median_age,generosity,land_area_skm,covid_cases_per_mil
0,52.493,2317.0,0.382,0.463,0.924,7.695,90396.0,17.3,27.4,-0.102,652230.0,946.0
1,69.0,17564.0,0.828,0.898,0.834,9.962,794544.0,79.7,31.7,-0.182,2780400.0,5044.0
2,67.055,57898.0,0.825,0.799,0.629,9.487,171600.0,27.7,35.1,-0.168,29743.0,13435.0
3,73.9,181419.0,0.914,0.94,0.442,10.796,4631419.0,108.2,38.7,0.159,7741220.0,779.0
4,73.3,104008.0,0.908,0.934,0.481,10.906,937275.0,94.1,44.0,0.042,83871.0,2408.0


In [4]:
# Define target vector
y = df["ladder_score"].values.reshape(-1, 1)
y[:5]

array([[2],
       [5],
       [5],
       [7],
       [7]], dtype=int64)

In [5]:
# Splitting into Train and Test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78, train_size=0.80 )


In [6]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(100, 12)
(25, 12)
(100, 1)
(25, 1)


In [7]:
# Creating StandardScaler instance
scaler = StandardScaler()

In [8]:
# Fitting Standard Scaller
X_scaler = scaler.fit(X_train)

In [9]:
# Scaling data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Fitting the Decision Tree Model

In [10]:
# Creating the decision tree classifier instance
model = tree.DecisionTreeClassifier()

In [11]:
# Fitting the model
model = model.fit(X_train_scaled, y_train)

# Making Predictions Using the Tree Mode

In [12]:
# Making predictions using the testing data
predictions = model.predict(X_test_scaled)

# Model Evaluation

In [13]:
# Calculating the confusion matrix
cm = confusion_matrix(y_test, predictions)
cm_df = pd.DataFrame(
    cm, index=["Actual 0", "Actual 1", "Actual 2", "Actual 3", "Actual 4"], 
    columns=["Predicted 0", "Predicted 1", "Predicted 2", "Predicted 3", "Predicted 4"]
)

# Calculating the accuracy score
acc_score = accuracy_score(y_test, predictions)

In [14]:
# Displaying results
print("Confusion Matrix")
display(cm_df)
print(f"Accuracy Score : {acc_score}")
print("Classification Report")
print(classification_report(y_test, predictions))

Confusion Matrix


Unnamed: 0,Predicted 0,Predicted 1,Predicted 2,Predicted 3,Predicted 4
Actual 0,0,1,1,0,0
Actual 1,2,0,0,1,0
Actual 2,1,2,2,4,0
Actual 3,0,0,2,5,0
Actual 4,0,0,0,1,3


Accuracy Score : 0.4
Classification Report
              precision    recall  f1-score   support

           3       0.00      0.00      0.00         2
           4       0.00      0.00      0.00         3
           5       0.40      0.22      0.29         9
           6       0.45      0.71      0.56         7
           7       1.00      0.75      0.86         4

    accuracy                           0.40        25
   macro avg       0.37      0.34      0.34        25
weighted avg       0.43      0.40      0.40        25

