In [None]:
import os
import numpy as np
import pandas as pd
import csv
from sklearn import tree
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from scipy.stats import uniform, norm
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import plot_confusion_matrix
import matplotlib.pyplot as plt
from imblearn.over_sampling import SMOTE


import warnings
warnings.filterwarnings('ignore')

In [None]:
x_train = np.empty((0,5))
y_train = np.array([])

with open("Toy Dataset.csv","r") as f:
    d_reader = csv.reader(f,delimiter=",",quotechar="\"")
    first = True
    
    for line in d_reader:
        if first:
            first = False
            continue
        x_train = np.append(x_train,np.array(list(map(int,line[2:7]))).reshape((1,5)),axis=0)
        y_train = np.append(y_train,int(line[8]))
print(x_train.shape)
print(y_train.shape)
# print(y_train)

In [None]:
rand_seed = 3454132

oversampler = SMOTE(sampling_strategy="not majority",random_state=rand_seed)

x_smote, y_smote = oversampler.fit_resample(x_train,y_train)

In [None]:
def train_decision_tree(x_train,y_train,hp_search):
    hp_search.fit(x_train,y_train)
    print("Best Score: {:.4f}".format(rscv.best_score_))
    for k,v in hp_search.best_params_.items():
        print("{} => {}".format(k,v))
    print("Splits: {}".format(hp_search.n_splits_))
    y_out = hp_search.predict(x_train)
    print("Accuracy: {:.4f}%".format(np.mean(y_out == y_train) * 100.0))
    return hp_search.best_estimator

In [None]:
#DecisionTree
random_search_iterations = 1000

parameters = {
    'criterion':["gini", "entropy"], 
    'splitter':["best", "random"], 
    'max_depth':range(1, 10), 
    'min_samples_split':uniform(loc=0,scale=1.0)
}

# train classifier for SMOTE data
dt = tree.DecisionTreeClassifier()
rscv = RandomizedSearchCV(dt, parameters,cv=10, n_iter=random_search_iterations,n_jobs=-1)
train_decision_tree(x_smote,y_smote,rscv)

# display confusion matrix
disp = plot_confusion_matrix(rscv, x_train, y_train,
                             display_labels=["Calm","Cheerful","Bravery","Fearful","Sadness","Love"],
                             cmap=plt.cm.Blues,
                             normalize='true')
# print(y_out)

In [None]:
import matplotlib.pyplot as plt

classes = ['Brave', 'Cheerful', 'Fearful', 'Love', 'Sadness', 'Calm']
dt.fit(x_train,y_train)
file = pd.read_csv("Toy Dataset.csv")
features = list(file)
fig, ax = plt.subplots(figsize=(20, 20))
treefig = tree.plot_tree(dt, class_names=classes, feature_names=features[2:7], fontsize=12, ax=ax)
plt.show()

In [None]:
import sklearn
print(sklearn.__version__)