# Decision Tree Model

In [1]:
# Initial imports
import pandas as pd
from path import Path
from sklearn import tree
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

# Loading and Preprocessing Data

In [2]:
# Loading data
file_path = Path("happiness_top12_df.csv")
df = pd.read_csv(file_path)
df.head()

Unnamed: 0,ladder_score,freedom,healty_life_exp,log_gdp_per_cap,meat_consumption,percept_corrupt,social_support,covid_tests_per_mil,ac_female,generosity,covid_deaths_per_mil,covid_total_tests,covid_active_cases
0,6,0.804,66.601,9.577,82.4,0.756,0.882,62085.0,3.22,-0.071,464.0,13206188.0,771258.0
1,3,0.893,60.633,8.755,5.2,0.774,0.603,16035.0,1.89,0.089,30.0,22149351.0,606387.0
2,5,0.718,64.703,10.189,51.0,0.845,0.903,203623.0,4.31,-0.111,100.0,29716907.0,180931.0
3,4,0.749,56.904,9.403,39.0,0.86,0.86,53044.0,3.46,-0.067,162.0,3149807.0,141264.0
4,6,0.862,68.597,9.859,58.6,0.799,0.831,8189.0,2.18,-0.147,391.0,1056915.0,103325.0


In [3]:
# Define features set
X = df.copy()
X = X.drop("ladder_score", axis=1)
X.head()

Unnamed: 0,freedom,healty_life_exp,log_gdp_per_cap,meat_consumption,percept_corrupt,social_support,covid_tests_per_mil,ac_female,generosity,covid_deaths_per_mil,covid_total_tests,covid_active_cases
0,0.804,66.601,9.577,82.4,0.756,0.882,62085.0,3.22,-0.071,464.0,13206188.0,771258.0
1,0.893,60.633,8.755,5.2,0.774,0.603,16035.0,1.89,0.089,30.0,22149351.0,606387.0
2,0.718,64.703,10.189,51.0,0.845,0.903,203623.0,4.31,-0.111,100.0,29716907.0,180931.0
3,0.749,56.904,9.403,39.0,0.86,0.86,53044.0,3.46,-0.067,162.0,3149807.0,141264.0
4,0.862,68.597,9.859,58.6,0.799,0.831,8189.0,2.18,-0.147,391.0,1056915.0,103325.0


In [4]:
# Define target vector
y = df["ladder_score"].values.reshape(-1, 1)
y[:5]

array([[6],
       [3],
       [5],
       [4],
       [6]], dtype=int64)

In [5]:
# Splitting into Train and Test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78, train_size=0.80 )


In [6]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(92, 12)
(24, 12)
(92, 1)
(24, 1)


In [7]:
# Creating StandardScaler instance
scaler = StandardScaler()

In [8]:
# Fitting Standard Scaller
X_scaler = scaler.fit(X_train)

In [9]:
# Scaling data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Fitting the Decision Tree Model

In [10]:
def decision_tree_model():
    # Creating the decision tree classifier instance
    model = tree.DecisionTreeClassifier()
    # Fitting the model
    model = model.fit(X_train_scaled, y_train)
    # Making predictions using the testing data
    predictions = model.predict(X_test_scaled)
    # Calculating the confusion matrix
    cm = confusion_matrix(y_test, predictions)
    cm_df = pd.DataFrame(
    cm, index=["Actual 0", "Actual 1", "Actual 2", "Actual 3", "Actual 4"], 
    columns=["Predicted 0", "Predicted 1", "Predicted 2", "Predicted 3", "Predicted 4"]
    )

    # Calculating the accuracy score
    acc_score = accuracy_score(y_test, predictions)
    # Displaying results
    print("Confusion Matrix")
    display(cm_df)
    print(f"Accuracy Score : {acc_score}")
    print("Classification Report")
    print(classification_report(y_test, predictions))

In [11]:
decision_tree_model()

Confusion Matrix


Unnamed: 0,Predicted 0,Predicted 1,Predicted 2,Predicted 3,Predicted 4
Actual 0,0,1,0,0,0
Actual 1,0,0,2,0,0
Actual 2,1,3,1,5,0
Actual 3,0,0,1,3,2
Actual 4,0,0,1,1,3


Accuracy Score : 0.2916666666666667
Classification Report
              precision    recall  f1-score   support

           3       0.00      0.00      0.00         1
           4       0.00      0.00      0.00         2
           5       0.20      0.10      0.13        10
           6       0.33      0.50      0.40         6
           7       0.60      0.60      0.60         5

    accuracy                           0.29        24
   macro avg       0.23      0.24      0.23        24
weighted avg       0.29      0.29      0.28        24

