In [1]:
import numpy as np
import pandas as pd
import comonotonic as cm
import os
from sklearn.model_selection import train_test_split
import copy
import utils
import matplotlib.pyplot as plt

In [2]:
# Note that we assume the last column is the label
# If the first column is ID's, remove it
filename = "glass.csv"
df = pd.read_csv("Datasets/"+filename)
colnames = [('X'+str(i)) for i in range(df.shape[1]-1)]
colnames.append('Y')
df.columns = colnames
df

Unnamed: 0,X0,X1,X2,X3,X4,X5,X6,X7,X8,Y
0,1.52101,13.64,4.49,1.10,71.78,0.06,8.75,0.00,0.0,1
1,1.51761,13.89,3.60,1.36,72.73,0.48,7.83,0.00,0.0,1
2,1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.00,0.0,1
3,1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.00,0.0,1
4,1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.00,0.0,1
...,...,...,...,...,...,...,...,...,...,...
209,1.51623,14.14,0.00,2.88,72.61,0.08,9.18,1.06,0.0,7
210,1.51685,14.92,0.00,1.99,73.06,0.00,8.40,1.59,0.0,7
211,1.52065,14.36,0.00,2.02,73.42,0.00,8.44,1.64,0.0,7
212,1.51651,14.38,0.00,1.94,73.61,0.00,8.48,1.57,0.0,7


In [3]:
# use the original data for Naive Bayes
original_X = df[colnames[:-1]].to_numpy()
original_Y = df[colnames[-1]].to_numpy()
original_X_train, original_X_test, original_Y_train, original_Y_test = train_test_split(original_X, original_Y, test_size=0.2, random_state=42)


df_copy = df.copy()
num_categories = 3
for col_idx in [i for i in range(9)]:
    discretized_col = pd.cut(df_copy.iloc[:,col_idx],num_categories, labels=[i for i in range(num_categories)])
    df_copy['X'+str(col_idx)] = discretized_col
# For pure comonotonic classifier
X = df_copy[colnames[:-1]].to_numpy()
Y = df_copy[colnames[-1]].to_numpy()
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [4]:
pure_como_classifier = cm.pure_comonotonic(X_train, Y_train, unrankable = None)
pure_como_classifier.run()
Y_predict = pure_como_classifier.predict(X_test)
accuracy = utils.get_accuracy(Y_predict, Y_test)
accuracy

0.627906976744186

In [5]:
# comparation with Naive Bayes
from sklearn.naive_bayes import GaussianNB
model = GaussianNB()
model.fit(original_X_train, original_Y_train)
pred = model.predict(original_X_test)
utils.get_accuracy(pred, original_Y_test)

0.5581395348837209

In [6]:
# use cluster comonotonic
uncategorized_x = df[colnames[:-1]].to_numpy()
cluster_como_classifier = cm.clustered_comonotonic(X_train, Y_train, unrankable = None,
                                                   uncategorized_x = uncategorized_x, min_corr = 0.45)
cluster_como_classifier.run()
Y_predict = cluster_como_classifier.predict(X_test)
accuracy = utils.get_accuracy(Y_predict, Y_test)
accuracy

0.6511627906976745