In [None]:
# importing the libraries 
# make sure that tree_functions_2.py is in the same directory as this notebook

from tree_functions_2 import *

# calling the appropriate tools for classification

from sklearn import tree

from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier

from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, BaggingClassifier

from sklearn.svm import SVC

from sklearn.metrics import accuracy_score, matthews_corrcoef, precision_score

from sklearn.metrics import classification_report, confusion_matrix

from sklearn.model_selection import train_test_split

import graphviz

In [None]:
# change figure configurations

%matplotlib inline

import matplotlib

font = {'size':16}

matplotlib.rc('font', **font)

matplotlib.rc('figure', figsize = (5.0, 5.0))

In [None]:
# the order of the trees 

n = 12

In [None]:
# setting a random seed

random.seed(42)

## Generating the Tree List and the Associated Data Frame

In [None]:
# creating the list of all non-isomorphic trees of order n

Tree_List = list(nx.nonisomorphic_trees(n))

In [None]:
# classes for trees

classes = ['path-like', 'star-like']

In [None]:
# evaluation-based total ordering on Tree_List

total_tree_evaluation_list = get_total_list_evaluation_based(Tree_List, 2, 1)

In [None]:
# classifying the trees Tree_List as 'path-like' and 'star-like'
# based on the total ordering

for j in range(len(total_tree_evaluation_list)):
    
    if j < len(total_tree_evaluation_list)/2:
        
        total_tree_evaluation_list[j].append(classes[0])
        
    else:
        
        total_tree_evaluation_list[j].append(classes[1]) 

In [None]:
# data frame containing the following graph statistics:
# log_{10}(P(.;2,1)), radius,  diameter, degree centrality, 
# closeness centrality, between centrality,
# Stirling Numbers of the First Kind for Trees, number of leaves, and class 

df = []

for i in range(len(total_tree_evaluation_list)):
    
    x = total_tree_evaluation_list[i]
        
    df.append([np.log10(float(x[0])), nx.radius(x[1]), nx.diameter(x[1]),
               get_degree_centrality(x[1]),
               get_closeness_centrality(x[1]),
               get_betweenness_centrality(x[1]),
               get_stirling_trees(x[1], n),
               get_leaf_number(x[1]),
              x[3]])
    
df = pd.DataFrame(df, columns = ['Log_Dist', 'Rad', 'Diam', 'Deg_Cent', 
                                 'Cls_Cent', 'Btw_Cent', 'Stirling', 'Leaf_Num', 'Class'])

In [None]:
df.head()

## Splitting the Tree List to Train and Test Sets

In [None]:
X = pd.DataFrame(np.array(list(df.loc[:, 'Stirling']))).iloc[:, 1:5]

In [None]:
X_1 = pd.DataFrame(np.array(list(df.loc[:, 'Cls_Cent'])))

X_2 = pd.DataFrame(np.array(list(df.loc[:, 'Btw_Cent'])))

In [None]:
X_3 = pd.concat([X_1, X_2], axis = 1)

X_3.columns = ['Cls_Cent', 'Btw_Cent']

In [None]:
X = pd.concat([X, X_3], axis = 1)

In [None]:
y = np.ravel(np.array(pd.DataFrame(df.loc[:, 'Class'])))

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.5, random_state = 42)

In [None]:
X_train = np.array(X_train)

X_test = np.array(X_test)

## Training and Testing Classifiers

In [None]:
# setting a random seed

random_state =  np.random.RandomState(seed = 42)

In [None]:
DF = []

## Trees

### Decision Tree

#### Decision Tree with Gini Criterion

In [None]:
# decision tree with gini

dtc = DecisionTreeClassifier(random_state = random_state)

# DecisionTreeClassifier(*, criterion='gini', splitter='best', 
#                       max_depth=None, min_samples_split=2, min_samples_leaf=1, 
#                       min_weight_fraction_leaf=0.0, max_features=None, 
#                       random_state=None, max_leaf_nodes=None, min_impurity_decrease=0.0, 
#                       class_weight=None, ccp_alpha=0.0)

dtc.fit(X_train, y_train)

y_train_dtc_pred = dtc.predict(X_train)

y_test_dtc_pred = dtc.predict(X_test)

print('Train Score:', dtc.score(X_train, y_train))

print('Test Score:', dtc.score(X_test, y_test))

print('Train Matthews Corr:', matthews_corrcoef(y_train, y_train_dtc_pred))

print('Test Matthews Corr:', matthews_corrcoef(y_test, y_test_dtc_pred))

print('--------')

print('Train Confusion Matrix:')

cm_train_dtc = confusion_matrix(y_train, y_train_dtc_pred, labels = classes)

fig = sns.heatmap(cm_train_dtc, annot = True, fmt = 'd', cbar = False)

plt.xlabel('Pedicted')

plt.ylabel('True')

plt.show()
    
print('--------')

print('Test Confusion Matrix:')

cm_test_dtc = confusion_matrix(y_test, y_test_dtc_pred, labels = classes)

fig = sns.heatmap(cm_test_dtc, annot = True, fmt = 'd', cbar = False)

plt.xlabel('Pedicted')

plt.ylabel('True')

plt.show()
    
print('--------')

print('Test Classification Report:')

print(classification_report(y_test, y_test_dtc_pred))

report = classification_report(y_test, y_test_dtc_pred, output_dict = True)

report1 = pd.DataFrame(report).transpose()

report1

# https://scikit-learn.org/stable/modules/tree.html#tree

#tree_data = tree.export_graphviz(dtc, out_file = None) 

#graph = graphviz.Source(tree_data) 

#graph  

In [None]:
DF.append(['dtc', 'gini', 0, 
           round(dtc.score(X_train, y_train), 5), round(dtc.score(X_test, y_test), 5), 
           round(matthews_corrcoef(y_train, y_train_dtc_pred), 5), 
           round(matthews_corrcoef(y_test, y_test_dtc_pred), 5)])

In [None]:
# from https://scikit-learn.org/stable/auto_examples/tree/plot_cost_complexity_pruning.html

path = dtc.cost_complexity_pruning_path(X_train, y_train)

ccp_alphas, impurities = path.ccp_alphas, path.impurities

In [None]:
fig, ax = plt.subplots()

ax.plot(ccp_alphas[:-1], impurities[:-1], marker = 'o', drawstyle = 'steps-post')

ax.set_xlabel('effective alpha')

ax.set_ylabel('total impurity of leaves')

ax.set_title('Total Impurity vs effective alpha for training set')

In [None]:
dtcs = []

for ccp_alpha in ccp_alphas:
    
    dtc = DecisionTreeClassifier(random_state = random_state, ccp_alpha = ccp_alpha)
    
    dtc.fit(X_train, y_train)
    
    dtcs.append(dtc)

In [None]:
dtcs = dtcs[:-1]

ccp_alphas = ccp_alphas[:-1]

node_counts = [dtc.tree_.node_count for dtc in dtcs]

depth = [dtc.tree_.max_depth for dtc in dtcs]

fig, ax = plt.subplots(2, 1)

ax[0].plot(ccp_alphas, node_counts, marker = 'o', drawstyle = 'steps-post')

ax[0].set_xlabel('alpha')

ax[0].set_ylabel('number of nodes')

ax[0].set_title('Number of nodes vs alpha')

ax[1].plot(ccp_alphas, depth, marker = 'o', drawstyle = 'steps-post')

ax[1].set_xlabel('alpha')

ax[1].set_ylabel('depth of tree')

ax[1].set_title('Depth vs alpha')

fig.tight_layout()

In [None]:
train_scores = [dtc.score(X_train, y_train) for dtc in dtcs]

test_scores = [dtc.score(X_test, y_test) for dtc in dtcs]

fig, ax = plt.subplots()

ax.set_xlabel('alpha')

ax.set_ylabel('accuracy')

ax.set_title('Accuracy vs alpha for training and testing sets')

ax.plot(ccp_alphas, train_scores, marker = 'o', label = 'train', drawstyle = 'steps-post')

ax.plot(ccp_alphas, test_scores, marker = 'o', label = 'test', drawstyle = 'steps-post')

ax.legend()

plt.show()

In [None]:
# decision tree with gini and pruning

dtc = DecisionTreeClassifier(random_state = random_state, ccp_alpha = 0.01)

# DecisionTreeClassifier(*, criterion='gini', splitter='best', 
#                       max_depth=None, min_samples_split=2, min_samples_leaf=1, 
#                       min_weight_fraction_leaf=0.0, max_features=None, 
#                       random_state=None, max_leaf_nodes=None, min_impurity_decrease=0.0, 
#                       class_weight=None, ccp_alpha=0.0)

dtc.fit(X_train, y_train)

y_train_dtc_pred = dtc.predict(X_train)

y_test_dtc_pred = dtc.predict(X_test)

print('Train Score:', dtc.score(X_train, y_train))

print('Test Score:', dtc.score(X_test, y_test))

print('Train Matthews Corr:', matthews_corrcoef(y_train, y_train_dtc_pred))

print('Test Matthews Corr:', matthews_corrcoef(y_test, y_test_dtc_pred))

print('--------')

print('Train Confusion Matrix:')

cm_train_dtc = confusion_matrix(y_train, y_train_dtc_pred, labels = classes)

fig = sns.heatmap(cm_train_dtc, annot = True, fmt = 'd', cbar = False)

plt.xlabel('Pedicted')

plt.ylabel('True')

plt.show()
    
print('--------')

print('Test Confusion Matrix:')

cm_test_dtc = confusion_matrix(y_test, y_test_dtc_pred, labels = classes)

fig = sns.heatmap(cm_test_dtc, annot = True, fmt = 'd', cbar = False)

plt.xlabel('Pedicted')

plt.ylabel('True')

plt.show()
    
print('--------')

print('Test Classification Report:')

print(classification_report(y_test, y_test_dtc_pred))

report = classification_report(y_test, y_test_dtc_pred, output_dict = True)

report1 = pd.DataFrame(report).transpose()

report1

# https://scikit-learn.org/stable/modules/tree.html#tree

#tree_data = tree.export_graphviz(dtc, out_file = None) 

#graph = graphviz.Source(tree_data) 

#graph 

In [None]:
DF.append(['dtc', 'gini', 1, 
           round(dtc.score(X_train, y_train), 5), round(dtc.score(X_test, y_test), 5), 
           round(matthews_corrcoef(y_train, y_train_dtc_pred), 5), 
           round(matthews_corrcoef(y_test, y_test_dtc_pred), 5)])

#### Decision Tree with Entropy Criterion

In [None]:
# decision tree with entropy

dtc = DecisionTreeClassifier(random_state = random_state, criterion = 'entropy')

# DecisionTreeClassifier(*, criterion='gini', splitter='best', 
#                       max_depth=None, min_samples_split=2, min_samples_leaf=1, 
#                       min_weight_fraction_leaf=0.0, max_features=None, 
#                       random_state=None, max_leaf_nodes=None, min_impurity_decrease=0.0, 
#                       class_weight=None, ccp_alpha=0.0)

dtc.fit(X_train, y_train)

y_train_dtc_pred = dtc.predict(X_train)

y_test_dtc_pred = dtc.predict(X_test)

print('Train Score:', dtc.score(X_train, y_train))

print('Test Score:', dtc.score(X_test, y_test))

print('Train Matthews Corr:', matthews_corrcoef(y_train, y_train_dtc_pred))

print('Test Matthews Corr:', matthews_corrcoef(y_test, y_test_dtc_pred))

print('--------')

print('Train Confusion Matrix:')

cm_train_dtc = confusion_matrix(y_train, y_train_dtc_pred, labels = classes)

fig = sns.heatmap(cm_train_dtc, annot = True, fmt = 'd', cbar = False)

plt.xlabel('Pedicted')

plt.ylabel('True')

plt.show()
    
print('--------')

print('Test Confusion Matrix:')

cm_test_dtc = confusion_matrix(y_test, y_test_dtc_pred, labels = classes)

fig = sns.heatmap(cm_test_dtc, annot = True, fmt = 'd', cbar = False)

plt.xlabel('Pedicted')

plt.ylabel('True')

plt.show()
    
print('--------')

print('Test Classification Report:')

print(classification_report(y_test, y_test_dtc_pred))

report = classification_report(y_test, y_test_dtc_pred, output_dict = True)

report1 = pd.DataFrame(report).transpose()

report1

#tree_data = tree.export_graphviz(dtc, out_file = None) 

#graph = graphviz.Source(tree_data) 

#graph  

In [None]:
DF.append(['dtc', 'entropy', 0, 
           round(dtc.score(X_train, y_train), 5), round(dtc.score(X_test, y_test), 5), 
           round(matthews_corrcoef(y_train, y_train_dtc_pred), 5), 
           round(matthews_corrcoef(y_test, y_test_dtc_pred), 5)])

In [None]:
path = dtc.cost_complexity_pruning_path(X_train, y_train)

ccp_alphas, impurities = path.ccp_alphas, path.impurities

In [None]:
fig, ax = plt.subplots()

ax.plot(ccp_alphas[:-1], impurities[:-1], marker = 'o', drawstyle = 'steps-post')

ax.set_xlabel('effective alpha')

ax.set_ylabel('total impurity of leaves')

ax.set_title('Total Impurity vs effective alpha for training set')

In [None]:
dtcs = []

for ccp_alpha in ccp_alphas:
    
    dtc = DecisionTreeClassifier(random_state = random_state, ccp_alpha = ccp_alpha)
    
    dtc.fit(X_train, y_train)
    
    dtcs.append(dtc)

In [None]:
dtcs = dtcs[:-1]

ccp_alphas = ccp_alphas[:-1]

node_counts = [dtc.tree_.node_count for dtc in dtcs]

depth = [dtc.tree_.max_depth for dtc in dtcs]

fig, ax = plt.subplots(2, 1)

ax[0].plot(ccp_alphas, node_counts, marker = 'o', drawstyle = 'steps-post')

ax[0].set_xlabel('alpha')

ax[0].set_ylabel('number of nodes')

ax[0].set_title('Number of nodes vs alpha')

ax[1].plot(ccp_alphas, depth, marker='o', drawstyle='steps-post')

ax[1].set_xlabel('alpha')

ax[1].set_ylabel('depth of tree')

ax[1].set_title('Depth vs alpha')

fig.tight_layout()

In [None]:
train_scores = [dtc.score(X_train, y_train) for dtc in dtcs]

test_scores = [dtc.score(X_test, y_test) for dtc in dtcs]

fig, ax = plt.subplots()

ax.set_xlabel('alpha')

ax.set_ylabel('accuracy')

ax.set_title('Accuracy vs alpha for training and testing sets')

ax.plot(ccp_alphas, train_scores, marker = 'o', label = 'train', drawstyle = 'steps-post')

ax.plot(ccp_alphas, test_scores, marker = 'o', label = 'test', drawstyle = 'steps-post')

ax.legend()

plt.show()

In [None]:
# decision tree with entropy and pruning

dtc = DecisionTreeClassifier(random_state = random_state, criterion = 'entropy', ccp_alpha = 0.03)

# DecisionTreeClassifier(*, criterion='gini', splitter='best', 
#                       max_depth=None, min_samples_split=2, min_samples_leaf=1, 
#                       min_weight_fraction_leaf=0.0, max_features=None, 
#                       random_state=None, max_leaf_nodes=None, min_impurity_decrease=0.0, 
#                       class_weight=None, ccp_alpha=0.0)

dtc.fit(X_train, y_train)

y_train_dtc_pred = dtc.predict(X_train)

y_test_dtc_pred = dtc.predict(X_test)

print('Train Score:', dtc.score(X_train, y_train))

print('Test Score:', dtc.score(X_test, y_test))

print('Train Matthews Corr:', matthews_corrcoef(y_train, y_train_dtc_pred))

print('Test Matthews Corr:', matthews_corrcoef(y_test, y_test_dtc_pred))

print('--------')

print('Train Confusion Matrix:')

cm_train_dtc = confusion_matrix(y_train, y_train_dtc_pred, labels = classes)

fig = sns.heatmap(cm_train_dtc, annot = True, fmt = 'd', cbar = False)

plt.xlabel('Pedicted')

plt.ylabel('True')

plt.show()
    
print('--------')

print('Test Confusion Matrix:')

cm_test_dtc = confusion_matrix(y_test, y_test_dtc_pred, labels = classes)

fig = sns.heatmap(cm_test_dtc, annot = True, fmt = 'd', cbar = False)

plt.xlabel('Pedicted')

plt.ylabel('True')

plt.show()
    
print('--------')

print('Test Classification Report:')

print(classification_report(y_test, y_test_dtc_pred))

report = classification_report(y_test, y_test_dtc_pred, output_dict = True)

report1 = pd.DataFrame(report).transpose()

report1

#tree_data = tree.export_graphviz(dtc, out_file = None) 

#graph = graphviz.Source(tree_data) 

#graph  

In [None]:
DF.append(['dtc', 'entropy', 1, 
           round(dtc.score(X_train, y_train), 5), round(dtc.score(X_test, y_test), 5), 
           round(matthews_corrcoef(y_train, y_train_dtc_pred), 5), 
           round(matthews_corrcoef(y_test, y_test_dtc_pred), 5)])

### Extra Tree

#### Extra Tree with Gini Criterion

In [None]:
# extra tree with gini

etc = ExtraTreeClassifier(random_state = random_state)

# ExtraTreeClassifier(*, criterion='gini', splitter='random', max_depth=None, 
#                    min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, 
#                    max_features='sqrt', random_state=None, max_leaf_nodes=None, 
#                    min_impurity_decrease=0.0, class_weight=None, ccp_alpha=0.0)

etc.fit(X_train, y_train)

y_train_etc_pred = etc.predict(X_train)

y_test_etc_pred = etc.predict(X_test)

print('Train Score:', etc.score(X_train, y_train))

print('Test Score:', etc.score(X_test, y_test))

print('Train Matthews Corr:', matthews_corrcoef(y_train, y_train_etc_pred))

print('Test Matthews Corr:', matthews_corrcoef(y_test, y_test_etc_pred))

print('--------')

print('Train Confusion Matrix:')

cm_train_etc = confusion_matrix(y_train, y_train_etc_pred, labels = classes)

fig = sns.heatmap(cm_train_etc, annot = True, fmt = 'd', cbar = False)

plt.xlabel('Pedicted')

plt.ylabel('True')

plt.show()
    
print('--------')

print('Test Confusion Matrix:')

cm_test_etc = confusion_matrix(y_test, y_test_etc_pred, labels = classes)

fig = sns.heatmap(cm_test_etc, annot = True, fmt = 'd', cbar = False)

plt.xlabel('Pedicted')

plt.ylabel('True')

plt.show()
    
print('--------')

print('Test Classification Report:')

print(classification_report(y_test,y_test_etc_pred))

report = classification_report(y_test,y_test_etc_pred, output_dict = True)

report1 = pd.DataFrame(report).transpose()

report1

#tree_data = tree.export_graphviz(dtc, out_file = None) 

#graph = graphviz.Source(tree_data) 

#graph 

In [None]:
DF.append(['etc', 'gini', 0, 
           round(etc.score(X_train, y_train), 5), round(etc.score(X_test, y_test), 5), 
           round(matthews_corrcoef(y_train, y_train_etc_pred), 5), 
           round(matthews_corrcoef(y_test, y_test_etc_pred), 5)])

In [None]:
path = etc.cost_complexity_pruning_path(X_train, y_train)

ccp_alphas, impurities = path.ccp_alphas, path.impurities

In [None]:
fig, ax = plt.subplots()

ax.plot(ccp_alphas[:-1], impurities[:-1], marker = 'o', drawstyle = 'steps-post')

ax.set_xlabel('effective alpha')

ax.set_ylabel('total impurity of leaves')

ax.set_title('Total Impurity vs effective alpha for training set')

In [None]:
etcs = []

for ccp_alpha in ccp_alphas:
    
    etc = ExtraTreeClassifier(random_state = random_state, ccp_alpha = ccp_alpha)
    
    etc.fit(X_train, y_train)
    
    etcs.append(etc)

In [None]:
etcs = etcs[:-1]

ccp_alphas = ccp_alphas[:-1]

node_counts = [etc.tree_.node_count for etc in etcs]

depth = [etc.tree_.max_depth for etc in etcs]

fig, ax = plt.subplots(2, 1)

ax[0].plot(ccp_alphas, node_counts, marker = 'o', drawstyle = 'steps-post')

ax[0].set_xlabel('alpha')

ax[0].set_ylabel('number of nodes')

ax[0].set_title('Number of nodes vs alpha')

ax[1].plot(ccp_alphas, depth, marker='o', drawstyle='steps-post')

ax[1].set_xlabel('alpha')

ax[1].set_ylabel('depth of tree')

ax[1].set_title('Depth vs alpha')

fig.tight_layout()

In [None]:
train_scores = [etc.score(X_train, y_train) for etc in etcs]

test_scores = [etc.score(X_test, y_test) for etc in etcs]

fig, ax = plt.subplots()

ax.set_xlabel('alpha')

ax.set_ylabel('accuracy')

ax.set_title('Accuracy vs alpha for training and testing sets')

ax.plot(ccp_alphas, train_scores, marker = 'o', label = 'train', drawstyle = 'steps-post')

ax.plot(ccp_alphas, test_scores, marker = 'o', label = 'test', drawstyle = 'steps-post')

ax.legend()

plt.show()

In [None]:
# extra tree with gini and pruning

etc = DecisionTreeClassifier(random_state = random_state, ccp_alpha = 0.01)

# DecisionTreeClassifier(*, criterion='gini', splitter='best', 
#                       max_depth=None, min_samples_split=2, min_samples_leaf=1, 
#                       min_weight_fraction_leaf=0.0, max_features=None, 
#                       random_state=None, max_leaf_nodes=None, min_impurity_decrease=0.0, 
#                       class_weight=None, ccp_alpha=0.0)

etc.fit(X_train, y_train)

y_train_etc_pred = etc.predict(X_train)

y_test_etc_pred = etc.predict(X_test)

print('Train Score:', etc.score(X_train, y_train))

print('Test Score:', etc.score(X_test, y_test))

print('Train Matthews Corr:', matthews_corrcoef(y_train, y_train_etc_pred))

print('Test Matthews Corr:', matthews_corrcoef(y_test, y_test_etc_pred))

print('--------')

print('Train Confusion Matrix:')

cm_train_etc = confusion_matrix(y_train, y_train_etc_pred, labels = classes)

fig = sns.heatmap(cm_train_etc, annot = True, fmt = 'd', cbar = False)

plt.xlabel('Pedicted')

plt.ylabel('True')

plt.show()
    
print('--------')

print('Test Confusion Matrix:')

cm_test_etc = confusion_matrix(y_test, y_test_etc_pred, labels = classes)

fig = sns.heatmap(cm_test_etc, annot = True, fmt = 'd', cbar = False)

plt.xlabel('Pedicted')

plt.ylabel('True')

plt.show()
    
print('--------')

print('Test Classification Report:')

print(classification_report(y_test, y_test_etc_pred))

report = classification_report(y_test, y_test_etc_pred, output_dict = True)

report1 = pd.DataFrame(report).transpose()

report1

#tree_data = tree.export_graphviz(etc, out_file = None) 

#graph = graphviz.Source(tree_data) 

#graph  

In [None]:
DF.append(['etc', 'gini', 1, 
           round(etc.score(X_train, y_train), 5), round(etc.score(X_test, y_test), 5), 
           round(matthews_corrcoef(y_train, y_train_etc_pred), 5), 
           round(matthews_corrcoef(y_test, y_test_etc_pred), 5)])

#### Extra Tree with Entropy Criterion

In [None]:
# extra tree with entropy

etc = ExtraTreeClassifier(random_state = random_state, criterion = 'entropy')

# ExtraTreeClassifier(*, criterion='gini', splitter='random', max_depth=None, 
#                    min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, 
#                    max_features='sqrt', random_state=None, max_leaf_nodes=None, 
#                    min_impurity_decrease=0.0, class_weight=None, ccp_alpha=0.0)

etc.fit(X_train, y_train)

y_train_etc_pred = etc.predict(X_train)

y_test_etc_pred = etc.predict(X_test)

print('Train Score:', etc.score(X_train, y_train))

print('Test Score:', etc.score(X_test, y_test))

print('Train Matthews Corr:', matthews_corrcoef(y_train, y_train_etc_pred))

print('Test Matthews Corr:', matthews_corrcoef(y_test, y_test_etc_pred))

print('--------')

print('Train Confusion Matrix:')

cm_train_etc = confusion_matrix(y_train, y_train_etc_pred, labels = classes)

fig = sns.heatmap(cm_train_etc, annot = True, fmt = 'd', cbar = False)

plt.xlabel('Pedicted')

plt.ylabel('True')

plt.show()
    
print('--------')

print('Test Confusion Matrix:')

cm_test_etc = confusion_matrix(y_test, y_test_etc_pred, labels = classes)

fig = sns.heatmap(cm_test_etc, annot = True, fmt = 'd', cbar = False)

plt.xlabel('Pedicted')

plt.ylabel('True')

plt.show()
    
print('--------')

print('Test Classification Report:')

print(classification_report(y_test,y_test_etc_pred))

report = classification_report(y_test,y_test_etc_pred, output_dict = True)

report1 = pd.DataFrame(report).transpose()

report1

#tree_data = tree.export_graphviz(dtc, out_file = None) 

#graph = graphviz.Source(tree_data) 

#graph 

In [None]:
DF.append(['etc', 'entropy', 0, 
           round(etc.score(X_train, y_train), 5), round(etc.score(X_test, y_test), 5), 
           round(matthews_corrcoef(y_train, y_train_etc_pred), 5), 
           round(matthews_corrcoef(y_test, y_test_etc_pred), 5)])

In [None]:
path = etc.cost_complexity_pruning_path(X_train, y_train)

ccp_alphas, impurities = path.ccp_alphas, path.impurities

In [None]:
fig, ax = plt.subplots()

ax.plot(ccp_alphas[:-1], impurities[:-1], marker = 'o', drawstyle = 'steps-post')

ax.set_xlabel('effective alpha')

ax.set_ylabel('total impurity of leaves')

ax.set_title('Total Impurity vs effective alpha for training set')

In [None]:
etcs = []

for ccp_alpha in ccp_alphas:
    
    etc = ExtraTreeClassifier(random_state = random_state, ccp_alpha = ccp_alpha)
    
    etc.fit(X_train, y_train)
    
    etcs.append(etc)

In [None]:
etcs = etcs[:-1]

ccp_alphas = ccp_alphas[:-1]

node_counts = [etc.tree_.node_count for etc in etcs]

depth = [etc.tree_.max_depth for etc in etcs]

fig, ax = plt.subplots(2, 1)

ax[0].plot(ccp_alphas, node_counts, marker = 'o', drawstyle = 'steps-post')

ax[0].set_xlabel('alpha')

ax[0].set_ylabel('number of nodes')

ax[0].set_title('Number of nodes vs alpha')

ax[1].plot(ccp_alphas, depth, marker='o', drawstyle='steps-post')

ax[1].set_xlabel('alpha')

ax[1].set_ylabel('depth of tree')

ax[1].set_title('Depth vs alpha')

fig.tight_layout()

In [None]:
train_scores = [etc.score(X_train, y_train) for etc in etcs]

test_scores = [etc.score(X_test, y_test) for etc in etcs]

fig, ax = plt.subplots()

ax.set_xlabel('alpha')

ax.set_ylabel('accuracy')

ax.set_title('Accuracy vs alpha for training and testing sets')

ax.plot(ccp_alphas, train_scores, marker = 'o', label = 'train', drawstyle = 'steps-post')

ax.plot(ccp_alphas, test_scores, marker = 'o', label = 'test', drawstyle = 'steps-post')

ax.legend()

plt.show()

In [None]:
# extra tree with entropy and pruning

etc = ExtraTreeClassifier(random_state = random_state, criterion = 'entropy', ccp_alpha = 0.02)

# ExtraTreeClassifier(*, criterion='gini', splitter='random', max_depth=None, 
#                    min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, 
#                    max_features='sqrt', random_state=None, max_leaf_nodes=None, 
#                    min_impurity_decrease=0.0, class_weight=None, ccp_alpha=0.0)

etc.fit(X_train, y_train)

y_train_etc_pred = etc.predict(X_train)

y_test_etc_pred = etc.predict(X_test)

print('Train Score:', etc.score(X_train, y_train))

print('Test Score:', etc.score(X_test, y_test))

print('Train Matthews Corr:', matthews_corrcoef(y_train, y_train_etc_pred))

print('Test Matthews Corr:', matthews_corrcoef(y_test, y_test_etc_pred))

print('--------')

print('Train Confusion Matrix:')

cm_train_etc = confusion_matrix(y_train, y_train_etc_pred, labels = classes)

fig = sns.heatmap(cm_train_etc, annot = True, fmt = 'd', cbar = False)

plt.xlabel('Pedicted')

plt.ylabel('True')

plt.show()
    
print('--------')

print('Test Confusion Matrix:')

cm_test_etc = confusion_matrix(y_test, y_test_etc_pred, labels = classes)

fig = sns.heatmap(cm_test_etc, annot = True, fmt = 'd', cbar = False)

plt.xlabel('Pedicted')

plt.ylabel('True')

plt.show()
    
print('--------')

print('Test Classification Report:')

print(classification_report(y_test, y_test_etc_pred))

report = classification_report(y_test, y_test_etc_pred, output_dict = True)

report1 = pd.DataFrame(report).transpose()

report1

#tree_data = tree.export_graphviz(etc, out_file = None) 

#graph = graphviz.Source(tree_data) 

#graph  

In [None]:
DF.append(['etc', 'entropy', 1, 
           round(etc.score(X_train, y_train), 5), round(etc.score(X_test, y_test), 5), 
           round(matthews_corrcoef(y_train, y_train_etc_pred), 5), 
           round(matthews_corrcoef(y_test, y_test_etc_pred), 5)])

## Ensembles

### Bagging

In [None]:
# bagging

bc = BaggingClassifier(random_state = random_state)

# BaggingClassifier(base_estimator=None, n_estimators=10, max_samples=1.0, max_features=1.0, 
#                  bootstrap=True, bootstrap_features=False, oob_score=False, warm_start=False, 
#                 n_jobs=None, random_state=None, verbose=0)

bc.fit(X_train, y_train)

y_train_bc_pred = bc.predict(X_train)

y_test_bc_pred = bc.predict(X_test)

print('Train Score:', bc.score(X_train, y_train))

print('Test Score:', bc.score(X_test, y_test))

print('Train Matthews Corr:', matthews_corrcoef(y_train, y_train_bc_pred))

print('Test Matthews Corr:', matthews_corrcoef(y_test, y_test_bc_pred))

print('--------')

print('Train Confusion Matrix:')

cm_train_bc = confusion_matrix(y_train, y_train_bc_pred, labels = classes)

fig = sns.heatmap(cm_train_bc, annot = True, fmt = 'd', cbar = False)

plt.xlabel('Pedicted')

plt.ylabel('True')

plt.show()
    
print('--------')

print('Test Confusion Matrix:')

cm_test_bc = confusion_matrix(y_test,y_test_bc_pred, labels = classes)

fig = sns.heatmap(cm_test_bc, annot = True, fmt = 'd', cbar = False)

plt.xlabel('Pedicted')

plt.ylabel('True')

plt.show()
    
print('--------')

print('Test Classification Report:')

print(classification_report(y_test, y_test_bc_pred))

report = classification_report(y_test, y_test_bc_pred, output_dict = True)

report1 = pd.DataFrame(report).transpose()

report1

In [None]:
DF.append(['bc', 'na', 'na', 
           round(bc.score(X_train, y_train), 5), round(bc.score(X_test, y_test), 5), 
           round(matthews_corrcoef(y_train, y_train_bc_pred), 5), 
           round(matthews_corrcoef(y_test, y_test_bc_pred), 5)])

### Random Forest

#### Random Forest with Gini Criterion

In [None]:
# random forest with gini

rfc = RandomForestClassifier(n_estimators = 50, random_state = random_state)

# RandomForestClassifier(n_estimators=100, *, criterion='gini', max_depth=None, 
#                       min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, 
#                       max_features='sqrt', max_leaf_nodes=None, min_impurity_decrease=0.0, 
#                       bootstrap=True, oob_score=False, n_jobs=None, 
#                       random_state=None, verbose=0, warm_start=False, 
#                       class_weight=None, ccp_alpha=0.0, max_samples=None)

rfc.fit(X_train, y_train)

y_train_rfc_pred = rfc.predict(X_train)

y_test_rfc_pred = rfc.predict(X_test)

print('Train Score:', rfc.score(X_train, y_train))

print('Test Score:', rfc.score(X_test, y_test))

print('---------')

print('Train Confusion Matrix:')

cm_train_rfc = confusion_matrix(y_train, y_train_rfc_pred, labels = classes)

fig = sns.heatmap(cm_train_rfc, annot = True, fmt = 'd', cbar = False)

plt.xlabel('Pedicted')

plt.ylabel('True')

plt.show()

print('--------')
    
print('Test Confusion Matrix:')

cm_test_rfc = confusion_matrix(y_test, y_test_rfc_pred, labels = classes)

fig = sns.heatmap(cm_test_rfc, annot = True, fmt = 'd', cbar = False)

plt.xlabel('Pedicted')

plt.ylabel('True')

plt.show()

print('--------')

print('Test Classification Report:')

print(classification_report(y_test, y_test_rfc_pred))

report = classification_report(y_test, y_test_rfc_pred, output_dict = True)

report1 = pd.DataFrame(report).transpose()

report1

In [None]:
DF.append(['rfc', 'gini', 0, 
           round(rfc.score(X_train, y_train), 5), round(rfc.score(X_test, y_test), 5), 
           round(matthews_corrcoef(y_train, y_train_rfc_pred), 5), 
           round(matthews_corrcoef(y_test, y_test_rfc_pred), 5)])

In [None]:
# random forest with gini and pruning

rfc = RandomForestClassifier(n_estimators = 50, random_state = random_state, ccp_alpha = 0.01)

# RandomForestClassifier(n_estimators=100, *, criterion='gini', max_depth=None, 
#                       min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, 
#                       max_features='sqrt', max_leaf_nodes=None, min_impurity_decrease=0.0, 
#                       bootstrap=True, oob_score=False, n_jobs=None, 
#                       random_state=None, verbose=0, warm_start=False, 
#                       class_weight=None, ccp_alpha=0.0, max_samples=None)

rfc.fit(X_train, y_train)

y_train_rfc_pred = rfc.predict(X_train)

y_test_rfc_pred = rfc.predict(X_test)

print('Train Score:', rfc.score(X_train, y_train))

print('Test Score:', rfc.score(X_test, y_test))

print('Train Matthews Corr:', matthews_corrcoef(y_train, y_train_rfc_pred))

print('Test Matthews Corr:', matthews_corrcoef(y_test, y_test_rfc_pred))

print('---------')

print('Train Confusion Matrix:')

cm_train_rfc = confusion_matrix(y_train, y_train_rfc_pred, labels = classes)

fig = sns.heatmap(cm_train_rfc, annot = True, fmt = 'd', cbar = False)

plt.xlabel('Pedicted')

plt.ylabel('True')

plt.show()

print('--------')
    
print('Test Confusion Matrix:')

cm_test_rfc = confusion_matrix(y_test, y_test_rfc_pred, labels = classes)

fig = sns.heatmap(cm_test_rfc, annot = True, fmt = 'd', cbar = False)

plt.xlabel('Pedicted')

plt.ylabel('True')

plt.show()

print('--------')

print('Test Classification Report:')

print(classification_report(y_test, y_test_rfc_pred))

report = classification_report(y_test, y_test_rfc_pred, output_dict = True)

report1 = pd.DataFrame(report).transpose()

report1

In [None]:
DF.append(['rfc', 'gini', 1, 
           round(rfc.score(X_train, y_train), 5), round(rfc.score(X_test, y_test), 5), 
           round(matthews_corrcoef(y_train, y_train_rfc_pred), 5), 
           round(matthews_corrcoef(y_test, y_test_rfc_pred), 5)])

#### Random Forest with Entropy Criterion

In [None]:
# random forest with entropy

rfc = RandomForestClassifier(n_estimators = 50, random_state = random_state, criterion = 'entropy')

# RandomForestClassifier(n_estimators=100, *, criterion='gini', max_depth=None, 
#                       min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, 
#                       max_features='sqrt', max_leaf_nodes=None, min_impurity_decrease=0.0, 
#                       bootstrap=True, oob_score=False, n_jobs=None, 
#                       random_state=None, verbose=0, warm_start=False, 
#                       class_weight=None, ccp_alpha=0.0, max_samples=None)

rfc.fit(X_train, y_train)

y_train_rfc_pred = rfc.predict(X_train)

y_test_rfc_pred = rfc.predict(X_test)

print('Train Score:', rfc.score(X_train, y_train))

print('Test Score:', rfc.score(X_test, y_test))

print('Train Matthews Corr:', matthews_corrcoef(y_train, y_train_rfc_pred))

print('Test Matthews Corr:', matthews_corrcoef(y_test, y_test_rfc_pred))


print('---------')

print('Train Confusion Matrix:')

cm_train_rfc = confusion_matrix(y_train, y_train_rfc_pred, labels = classes)

fig = sns.heatmap(cm_train_rfc, annot = True, fmt = 'd', cbar = False)

plt.xlabel('Pedicted')

plt.ylabel('True')

plt.show()

print('--------')
    
print('Test Confusion Matrix:')

cm_test_rfc = confusion_matrix(y_test, y_test_rfc_pred, labels = classes)

fig = sns.heatmap(cm_test_rfc, annot = True, fmt = 'd', cbar = False)

plt.xlabel('Pedicted')

plt.ylabel('True')

plt.show()

print('--------')

print('Test Classification Report:')

print(classification_report(y_test, y_test_rfc_pred))

report = classification_report(y_test, y_test_rfc_pred, output_dict = True)

report1 = pd.DataFrame(report).transpose()

report1

In [None]:
DF.append(['rfc', 'entropy', 0, 
           round(rfc.score(X_train, y_train), 5), round(rfc.score(X_test, y_test), 5), 
           round(matthews_corrcoef(y_train, y_train_rfc_pred), 5), 
           round(matthews_corrcoef(y_test, y_test_rfc_pred), 5)])

In [None]:
# random forest with entropy and pruning

rfc = RandomForestClassifier(n_estimators = 50, random_state = random_state, criterion = 'entropy',
                             ccp_alpha = 0.03)

# RandomForestClassifier(n_estimators=100, *, criterion='gini', max_depth=None, 
#                       min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, 
#                       max_features='sqrt', max_leaf_nodes=None, min_impurity_decrease=0.0, 
#                       bootstrap=True, oob_score=False, n_jobs=None, 
#                       random_state=None, verbose=0, warm_start=False, 
#                       class_weight=None, ccp_alpha=0.0, max_samples=None)

rfc.fit(X_train, y_train)

y_train_rfc_pred = rfc.predict(X_train)

y_test_rfc_pred = rfc.predict(X_test)

print('Train Score:', rfc.score(X_train, y_train))

print('Test Score:', rfc.score(X_test, y_test))

print('Train Matthews Corr:', matthews_corrcoef(y_train, y_train_rfc_pred))

print('Test Matthews Corr:', matthews_corrcoef(y_test, y_test_rfc_pred))

print('---------')

print('Train Confusion Matrix:')

cm_train_rfc = confusion_matrix(y_train, y_train_rfc_pred, labels = classes)

fig = sns.heatmap(cm_train_rfc, annot = True, fmt = 'd', cbar = False)

plt.xlabel('Pedicted')

plt.ylabel('True')

plt.show()

print('--------')
    
print('Test Confusion Matrix:')

cm_test_rfc = confusion_matrix(y_test, y_test_rfc_pred, labels = classes)

fig = sns.heatmap(cm_test_rfc, annot = True, fmt = 'd', cbar = False)

plt.xlabel('Pedicted')

plt.ylabel('True')

plt.show()

print('--------')

print('Test Classification Report:')

print(classification_report(y_test, y_test_rfc_pred))

report = classification_report(y_test, y_test_rfc_pred, output_dict = True)

report1 = pd.DataFrame(report).transpose()

report1

In [None]:
DF.append(['rfc', 'entropy', 1, 
           round(rfc.score(X_train, y_train), 5), round(rfc.score(X_test, y_test), 5), 
           round(matthews_corrcoef(y_train, y_train_rfc_pred), 5), 
           round(matthews_corrcoef(y_test, y_test_rfc_pred), 5)])

### Extra Trees 

#### Extra Trees  with Gini Criterion

In [None]:
# extra trees with gini

etsc = ExtraTreesClassifier(n_estimators = 50, random_state = random_state)

# ExtraTreesClassifier(n_estimators=100, *, criterion='gini', max_depth=None, 
#                     min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, 
#                     max_features='sqrt', max_leaf_nodes=None, min_impurity_decrease=0.0, 
#                     bootstrap=False, oob_score=False, n_jobs=None, 
#                     random_state=None, verbose=0, warm_start=False, class_weight=None, 
#                     ccp_alpha=0.0, max_samples=None)

etsc.fit(X_train, y_train)

y_train_etsc_pred = etsc.predict(X_train)

y_test_etsc_pred = etsc.predict(X_test)

print('Train Score:', etsc.score(X_train, y_train))

print('Test Score:', etsc.score(X_test, y_test))

print('Train Matthews Corr:', matthews_corrcoef(y_train, y_train_etsc_pred))

print('Test Matthews Corr:', matthews_corrcoef(y_test, y_test_etsc_pred))

print('--------')

print('Train Confusion Matrix:')

cm_train_etsc = confusion_matrix(y_train, y_train_etsc_pred, labels = classes)

fig = sns.heatmap(cm_train_etsc, annot = True, fmt = 'd', cbar = False)

plt.xlabel('Pedicted')

plt.ylabel('True')

plt.show()
    
print('--------')

print('Test Confusion Matrix:')

cm_test_etsc = confusion_matrix(y_test, y_test_etsc_pred, labels = classes)

fig = sns.heatmap(cm_test_etsc, annot = True, fmt = 'd', cbar = False)

plt.xlabel('Pedicted')

plt.ylabel('True')

plt.show()
    
print('--------')

print('Test Classification Report:')

print(classification_report(y_test,y_test_etsc_pred))

report = classification_report(y_test,y_test_etsc_pred, output_dict = True)

report1 = pd.DataFrame(report).transpose()

report1

In [None]:
DF.append(['etsc', 'gini', 0, 
           round(etsc.score(X_train, y_train), 5), round(etsc.score(X_test, y_test), 5), 
           round(matthews_corrcoef(y_train, y_train_etsc_pred), 5), 
           round(matthews_corrcoef(y_test, y_test_etsc_pred), 5)])

In [None]:
# extra trees with gini and pruning

etsc = ExtraTreesClassifier(n_estimators = 50, random_state = random_state, ccp_alpha = 0.01)

# ExtraTreesClassifier(n_estimators=100, *, criterion='gini', max_depth=None, 
#                     min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, 
#                     max_features='sqrt', max_leaf_nodes=None, min_impurity_decrease=0.0, 
#                     bootstrap=False, oob_score=False, n_jobs=None, 
#                     random_state=None, verbose=0, warm_start=False, class_weight=None, 
#                     ccp_alpha=0.0, max_samples=None)

etsc.fit(X_train, y_train)

y_train_etsc_pred = etsc.predict(X_train)

y_test_etsc_pred = etsc.predict(X_test)

print('Train Score:', etsc.score(X_train, y_train))

print('Test Score:', etsc.score(X_test, y_test))

print('Train Matthews Corr:', matthews_corrcoef(y_train, y_train_etsc_pred))

print('Test Matthews Corr:', matthews_corrcoef(y_test, y_test_etsc_pred))

print('--------')

print('Train Confusion Matrix:')

cm_train_etsc = confusion_matrix(y_train, y_train_etsc_pred, labels = classes)

fig = sns.heatmap(cm_train_etsc, annot = True, fmt = 'd', cbar = False)

plt.xlabel('Pedicted')

plt.ylabel('True')

plt.show()
    
print('--------')

print('Test Confusion Matrix:')

cm_test_etsc = confusion_matrix(y_test, y_test_etsc_pred, labels = classes)

fig = sns.heatmap(cm_test_etsc, annot = True, fmt = 'd', cbar = False)

plt.xlabel('Pedicted')

plt.ylabel('True')

plt.show()
    
print('--------')

print('Test Classification Report:')

print(classification_report(y_test,y_test_etsc_pred))

report = classification_report(y_test,y_test_etsc_pred, output_dict = True)

report1 = pd.DataFrame(report).transpose()

report1

In [None]:
DF.append(['etsc', 'gini', 1, 
           round(etsc.score(X_train, y_train), 5), round(etsc.score(X_test, y_test), 5), 
           round(matthews_corrcoef(y_train, y_train_etsc_pred), 5), 
           round(matthews_corrcoef(y_test, y_test_etsc_pred), 5)])

#### Extra Trees  with Entropy Criterion

In [None]:
# extra trees with entropy

etsc = ExtraTreesClassifier(n_estimators = 50, random_state = random_state, criterion = 'entropy')

# ExtraTreesClassifier(n_estimators=100, *, criterion='gini', max_depth=None, 
#                     min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, 
#                     max_features='sqrt', max_leaf_nodes=None, min_impurity_decrease=0.0, 
#                     bootstrap=False, oob_score=False, n_jobs=None, 
#                     random_state=None, verbose=0, warm_start=False, class_weight=None, 
#                     ccp_alpha=0.0, max_samples=None)

etsc.fit(X_train, y_train)

y_train_etsc_pred = etsc.predict(X_train)

y_test_etsc_pred = etsc.predict(X_test)

print('Train Score:', etsc.score(X_train, y_train))

print('Test Score:', etsc.score(X_test, y_test))

print('Train Matthews Corr:', matthews_corrcoef(y_train, y_train_etsc_pred))

print('Test Matthews Corr:', matthews_corrcoef(y_test, y_test_etsc_pred))

print('--------')

print('Train Confusion Matrix:')

cm_train_etsc = confusion_matrix(y_train, y_train_etsc_pred, labels = classes)

fig = sns.heatmap(cm_train_etsc, annot = True, fmt = 'd', cbar = False)

plt.xlabel('Pedicted')

plt.ylabel('True')

plt.show()
    
print('--------')

print('Test Confusion Matrix:')

cm_test_etsc = confusion_matrix(y_test, y_test_etsc_pred, labels = classes)

fig = sns.heatmap(cm_test_etsc, annot = True, fmt = 'd', cbar = False)

plt.xlabel('Pedicted')

plt.ylabel('True')

plt.show()
    
print('--------')

print('Test Classification Report:')

print(classification_report(y_test,y_test_etsc_pred))

report = classification_report(y_test,y_test_etsc_pred, output_dict = True)

report1 = pd.DataFrame(report).transpose()

report1

In [None]:
DF.append(['etsc', 'entropy', 0, 
           round(etsc.score(X_train, y_train), 5), round(etsc.score(X_test, y_test), 5), 
           round(matthews_corrcoef(y_train, y_train_etsc_pred), 5), 
           round(matthews_corrcoef(y_test, y_test_etsc_pred), 5)])

In [None]:
# extra trees with entropy and pruning

etsc = ExtraTreesClassifier(n_estimators = 50, random_state = random_state, criterion = 'entropy', 
                           ccp_alpha = 0.02)

# ExtraTreesClassifier(n_estimators=100, *, criterion='gini', max_depth=None, 
#                     min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, 
#                     max_features='sqrt', max_leaf_nodes=None, min_impurity_decrease=0.0, 
#                     bootstrap=False, oob_score=False, n_jobs=None, 
#                     random_state=None, verbose=0, warm_start=False, class_weight=None, 
#                     ccp_alpha=0.0, max_samples=None)

etsc.fit(X_train, y_train)

y_train_etsc_pred = etsc.predict(X_train)

y_test_etsc_pred = etsc.predict(X_test)

print('Train Score:', etsc.score(X_train, y_train))

print('Test Score:', etsc.score(X_test, y_test))

print('Train Matthews Corr:', matthews_corrcoef(y_train, y_train_etsc_pred))

print('Test Matthews Corr:', matthews_corrcoef(y_test, y_test_etsc_pred))

print('--------')

print('Train Confusion Matrix:')

cm_train_etsc = confusion_matrix(y_train, y_train_etsc_pred, labels = classes)

fig = sns.heatmap(cm_train_etsc, annot = True, fmt = 'd', cbar = False)

plt.xlabel('Pedicted')

plt.ylabel('True')

plt.show()
    
print('--------')

print('Test Confusion Matrix:')

cm_test_etsc = confusion_matrix(y_test, y_test_etsc_pred, labels = classes)

fig = sns.heatmap(cm_test_etsc, annot = True, fmt = 'd', cbar = False)

plt.xlabel('Pedicted')

plt.ylabel('True')

plt.show()
    
print('--------')

print('Test Classification Report:')

print(classification_report(y_test,y_test_etsc_pred))

report = classification_report(y_test,y_test_etsc_pred, output_dict = True)

report1 = pd.DataFrame(report).transpose()

report1

In [None]:
DF.append(['etsc', 'entropy', 1, 
           round(etsc.score(X_train, y_train), 5), round(etsc.score(X_test, y_test), 5), 
           round(matthews_corrcoef(y_train, y_train_etsc_pred), 5), 
           round(matthews_corrcoef(y_test, y_test_etsc_pred), 5)])

## Support Vector Machines

In [None]:
# Linear SVC

svc = SVC(kernel = 'linear', random_state = random_state)

# SVC(*, C=1.0, kernel='rbf', degree=3, gamma='scale', coef0=0.0, shrinking=True, 
#    probability=False, tol=0.001, cache_size=200, class_weight=None, 
#    verbose=False, max_iter=-1, decision_function_shape='ovr', break_ties=False, random_state=None)

svc.fit(X_train, y_train)

y_train_svc_pred = svc.predict(X_train)

y_test_svc_pred = svc.predict(X_test)

print('Train Score:', svc.score(X_train, y_train))

print('Test Score:', svc.score(X_test, y_test))

print('Train Matthews Corr:', matthews_corrcoef(y_train, y_train_svc_pred))

print('Test Matthews Corr:', matthews_corrcoef(y_test, y_test_svc_pred))

print('--------')

print('Train Confusion Matrix:')

cm_train_svc = confusion_matrix(y_train, y_train_svc_pred, labels = classes)

fig = sns.heatmap(cm_train_svc, annot = True, fmt = 'd', cbar = False)

plt.xlabel('Pedicted')

plt.ylabel('True')

plt.show()
    
print('--------')

print('Test Confusion Matrix:')

cm_test_svc = confusion_matrix(y_test, y_test_svc_pred, labels = classes)

fig = sns.heatmap(cm_test_svc, annot = True, fmt = 'd', cbar = False)

plt.xlabel('Pedicted')

plt.ylabel('True')

plt.show()
    
print('--------')

print('Test Classification Report:')

print(classification_report(y_test, y_test_svc_pred))

report = classification_report(y_test, y_test_svc_pred, output_dict = True)

report1 = pd.DataFrame(report).transpose()

report1

In [None]:
DF.append(['svc', 'na', 'na', 
           round(svc.score(X_train, y_train), 5), round(svc.score(X_test, y_test), 5), 
           round(matthews_corrcoef(y_train, y_train_svc_pred), 5), 
           round(matthews_corrcoef(y_test, y_test_svc_pred), 5)])

In [None]:
# Quadratic SVC

svc2 = SVC(kernel = 'poly', degree = 2, random_state = random_state, gamma = 'auto')

# SVC(*, C=1.0, kernel='rbf', degree=3, gamma='scale', coef0=0.0, shrinking=True, 
#    probability=False, tol=0.001, cache_size=200, class_weight=None, 
#    verbose=False, max_iter=-1, decision_function_shape='ovr', break_ties=False, random_state=None)

svc2.fit(X_train, y_train)

y_train_svc2_pred = svc2.predict(X_train)

y_test_svc2_pred = svc2.predict(X_test)

print('Train Score:', svc2.score(X_train, y_train))

print('Test Score:', svc2.score(X_test, y_test))

print('Train Matthews Corr:', matthews_corrcoef(y_train, y_train_svc2_pred))

print('Test Matthews Corr:', matthews_corrcoef(y_test, y_test_svc2_pred))

print('--------')

print('Train Confusion Matrix:')

cm_train_svc2 = confusion_matrix(y_train, y_train_svc2_pred, labels = classes)

fig = sns.heatmap(cm_train_svc2, annot = True, fmt = 'd', cbar = False)

plt.xlabel('Pedicted')

plt.ylabel('True')

plt.show()
    
print('--------')

print('Test Confusion Matrix:')

cm_test_svc2 = confusion_matrix(y_test, y_test_svc2_pred, labels = classes)

fig = sns.heatmap(cm_test_svc2, annot = True, fmt = 'd', cbar = False)

plt.xlabel('Pedicted')

plt.ylabel('True')

plt.show()
    
print('--------')

print('Test Classification Report:')

print(classification_report(y_test, y_test_svc2_pred))

report = classification_report(y_test, y_test_svc2_pred, output_dict = True)

report1 = pd.DataFrame(report).transpose()

report1

In [None]:
DF.append(['svc2', 'na', 'na', 
           round(svc2.score(X_train, y_train), 5), round(svc2.score(X_test, y_test), 5), 
           round(matthews_corrcoef(y_train, y_train_svc2_pred), 5), 
           round(matthews_corrcoef(y_test, y_test_svc2_pred), 5)])

## Table

In [None]:
DF2 = pd.DataFrame(np.array(DF), columns = ['Method', 'Criterion', 'Pruning', 'TrainScore',
                                            'TestScore', 'TrainCorr', 'TestCorr'])

In [None]:
DF2