### Imports

In [None]:
import os
import math
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.metrics import matthews_corrcoef
from sklearn.preprocessing import LabelEncoder
from sklearn.multiclass import OneVsRestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_recall_fscore_support
from sklearn.model_selection import KFold
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

### Functions

In [None]:
#Distance Function
def distance(metrics):

    original = {}
    original['accuracy'] = 0.7374
    original['sensitivity'] = 0.7465
    original['specificity'] = 0.7322
    original['mcc'] = 0.46

    distance = math.pow((metrics['accuracy'] -original['accuracy']),2)
    distance = distance + math.pow((metrics['sensitivity'] -original['sensitivity']),2)
    distance = distance + math.pow((metrics['specificity'] -original['specificity']),2)
    distance = distance + math.pow((metrics['mcc'] -original['mcc']),2)

    distance = math.sqrt(distance)

    return distance

#Calculate_Metrics
def calculate_metrics(tp,tn,fp,fn):
    accuracy=(tp+tn)/(tp+tn+fp+fn)
    if (tp== 0 and fn==0):    
        sensitivity=0
    else:
        sensitivity=tp/(tp+fn)
    
    if (tn== 0 and fp==0):    
        specificity=0
    else:
        specificity=tn/(tn+fp)
        
    
    
    if (tp== 0 or tn==0) and (fn==0 or fp==0):
        mcc=0
    else:
        mcc= ((tp*tn)-(fn*fp))/math.sqrt((tp+fn)*(tn+fp)*(tp+fp)*(tn+fn))

    metric=[accuracy,sensitivity,specificity, mcc]
    metrics=[round(_metric*100,2) for _metric in metric]
    return metrics
#Metrics from confusion matrix
def calc_from_matrix(mat):
    
    total=(sum(sum(mat)))
    tps=[mat[i][i] for i in range(len(mat))]
#     print(tps)

    _fn=[]
    fnn=[mat[i] for i in range(len(mat))]
    for i in range(len(mat)):
        temp=list(fnn[i])
        del temp[i]
        _fn.append(temp)

#     print(_fn)
    _fp=[]
    fpp=[mat[:,i] for i in range(len(mat))]
    for i in range(len(mat)):
        temp=list(fpp[i])
        del temp[i]
        _fp.append(temp)

    _fold_macro_metrics=[]
    _fold_micro_metrics=[]
    for i in range(len(mat)):
        tp = tps[i]
        fn = sum(_fn[i])
        fp = sum(_fp[i])
        tn = total -(tp+fp+fn)
#         print([tp,tn,fp,fn])
        _metrics=calculate_metrics(tp,tn,fp,fn)
#         print(_metrics)
        _fold_macro_metrics.append(_metrics)
        _fold_micro_metrics.append([tp,fn,fp,tn])


    # fold_metrics
    # metric=[accuracy,sensitivity,specificity, mcc]
    _fold_acc=np.sum([item[0] for item in _fold_macro_metrics]) / len(_fold_macro_metrics)
    _fold_sens=np.sum([item[1] for item in _fold_macro_metrics]) / len(_fold_macro_metrics)
    _fold_spec=np.sum([item[2] for item in _fold_macro_metrics]) / len(_fold_macro_metrics)
    _fold_mcc=np.sum([item[3] for item in _fold_macro_metrics]) / len(_fold_macro_metrics)

    _fold_macros=[_fold_acc,_fold_sens,_fold_spec,_fold_mcc]

    _fold_tp=np.sum([item[0] for item in _fold_micro_metrics]) / len(_fold_micro_metrics)
    _fold_fn=np.sum([item[1] for item in _fold_micro_metrics]) / len(_fold_micro_metrics)
    _fold_fp=np.sum([item[2] for item in _fold_micro_metrics]) / len(_fold_micro_metrics)
    _fold_tn=np.sum([item[3] for item in _fold_micro_metrics]) / len(_fold_micro_metrics)

    _fold_micros=calculate_metrics(_fold_tp,_fold_tn,_fold_fp,_fold_fn)

#     print("==================================")
#     print("Macro")
#     print(_fold_macros)
#     print("----------------------------------")
#     print("Micro")
#     print(_fold_micros)
#     print("==================================")
    
    return [_fold_macros,_fold_micros]

#Accuracy
def _accuracy(actual, predicted):
    return accuracy_score(actual, predicted)
        

#Sensitivity(Recall)
def _sens(actual, predicted):
    return precision_recall_fscore_support(actual, predicted,average='weighted')[1]

#MCC
def _mcc(actual, predicted):
    return matthews_corrcoef(actual,predicted)

#up-sampling
def _upSample(_dataset, _size):
    
    # Creating the unique class names and then sorting
    _classes=sorted(_dataset['label'].unique())
    # Putting each class in a differnet data frame
    _data_by_class=[_dataset[_dataset['label']==_class] for _class in _classes ]
    
    for ii in range(len(_data_by_class)):
        np.random.seed(2)
        _data_by_class[ii]=_data_by_class[ii].reindex(np.random.permutation(_data_by_class[ii].index))

        if len(_data_by_class[ii])>_size:
            _data_by_class[ii]=_data_by_class[ii][:_size][:]
        else:
            _time = _size // len(_data_by_class[ii])
            _res=[]
            if _time > 1:
                _time=_time+1
            for xx in range(_time):
                _res.append(_data_by_class[ii])
            
            _data_by_class[ii]=pd.concat([item for item in _res], axis=0)
            _data_by_class[ii]=_data_by_class[ii][:_size][:]
                
            
    _folds_by_class_=[]
    _num_of_folds=5
    # Creating folds from each class
    for _item in _data_by_class:
        co=_size//_num_of_folds
        _folds_by_class_.append([_item[(i*co):((i*co)+co)] for i in range(_num_of_folds)])
    
    # Creating Folds from the whole feature
    # by concatenation of each fold from each class
    _folds_=[
        pd.concat([_folds_by_class_[i][j] for i in range(len(_folds_by_class_))]) 
        for j in range(_num_of_folds)
        ]
    
    # Train,Test out of 5 folds
    _train_test_=[]
    for ii in range(_num_of_folds):
        _test__ =_folds_[ii]
        _train__=pd.concat([_folds_[xx] for xx in range(_num_of_folds) if xx!=ii])
        _train_test_.append([_train__,_test__])
    
    return _train_test_
    
    
#down-Smapling Data
def _downSample(_dataset, _size):

    # Creating the unique class names and then sorting
    _classes=sorted(_dataset['label'].unique())
    # Putting each class in a differnet data frame
    _data_by_class=[_dataset[_dataset['label']==_class] for _class in _classes ]
    
    _folds_by_class_=[]
    _num_of_folds=5
    # Creating folds from each class
    for _item in _data_by_class:
        np.random.seed(2)
        _item=_item.reindex(np.random.permutation(_item.index))
        co=_size//_num_of_folds
        _folds_by_class_.append([_item[(i*co):((i*co)+co)] for i in range(_num_of_folds)])
    
    # Creating Folds from the whole feature
    # by concatenation of each fold from each class
    _folds_=[
        pd.concat([_folds_by_class_[i][j] for i in range(len(_folds_by_class_))]) 
        for j in range(_num_of_folds)
        ]
    
    # Train,Test out of 5 folds
    _train_test_=[]
    for ii in range(_num_of_folds):
        _test__ =_folds_[ii]
        _train__=pd.concat([_folds_[xx] for xx in range(_num_of_folds) if xx!=ii])
        _train_test_.append([_train__,_test__])
    
    return _train_test_

#Splits Data into [X_train, X_test, y_train, y_test]
def _split_(_train,_test):
    
    y_test = _test['label'].values
    del _test['label']
    X_test = _test.values
    
    y_train = _train['label'].values
    del _train['label']
    X_train = _train.values

    
    return [X_train, X_test, y_train, y_test]

#Prediciton
def _predict(_train_test,_g,_c,_type):
    
    X_train, X_test, y_train, y_test = _train_test
    
    _gamma,_C =_g,_c
    
    classifier = None
    if _type=='svc':
        classifier=SVC(kernel='rbf', gamma=_gamma,C=_C, decision_function_shape='ovr')    
    elif _type=='onevsall':
        classifier = OneVsRestClassifier( SVC(kernel='rbf', gamma=_gamma,C=_C) )
        

    #Training the algorithm on training data
    classifier.fit(X_train, y_train)

    # Predicting the Test set results
    y_pred = classifier.predict(X_test)

    return [y_test.copy(),y_pred.copy()]

#Compute from confucion matrix
def computeConfusionMatrix(_res):
    all_macros_acc=np.sum([item[0][0] for item in _res]) / len(_res)
    all_macros_sens=np.sum([item[0][1] for item in _res]) / len(_res)
    all_macros_spec=np.sum([item[0][2] for item in _res]) / len(_res)
    all_macros_mcc=np.sum([item[0][3] for item in _res]) / len(_res)

    _problem_macros=[all_macros_acc,all_macros_sens,all_macros_spec,all_macros_mcc]
    _mac_distance = distance({
        'accuracy':(all_macros_acc/100),
        'sensitivity':(all_macros_sens/100),
        'specificity':(all_macros_spec/100),
        'mcc':all_macros_mcc/100
        })

    all_micros_acc=np.sum([item[1][0] for item in _res]) / len(_res)
    all_micros_sens=np.sum([item[1][1] for item in _res]) / len(_res)
    all_micros_spec=np.sum([item[1][2] for item in _res]) / len(_res)
    all_micros_mcc=np.sum([item[1][3] for item in _res]) / len(_res)

    _problem_micros=[all_micros_acc,all_micros_sens,all_micros_spec,all_micros_mcc]
    _mic_distance = distance({
        'accuracy':(all_micros_acc/100),
        'sensitivity':(all_micros_sens/100),
        'specificity':(all_micros_spec/100),
        'mcc':all_micros_mcc/100
        })

    print("MACRO========================")
    print(_problem_macros)
    print(_mac_distance)

    print("MICRO========================")
    print(_problem_micros)
    print(_mic_distance)

### Main Program

In [None]:
kf = KFold(n_splits=5)

In [None]:
data7=pd.read_csv(os.path.join('dataset','aac7.csv'))

In [None]:
y = data7['label'].values
le=LabelEncoder()
y=le.fit_transform(y)
data7['label']=y

### Results (7 classes)

#### On Sorted

In [None]:
kf = KFold(n_splits=5)
_res_svc=[]
_res_onvsall=[]
for train, test in kf.split(data7.copy()):

    df_train=pd.DataFrame(data7.iloc[train])
    df_test=pd.DataFrame(data7.iloc[test])
    #SVC
    y7_test,y7_pred=_predict(_split_(df_train.copy(),df_test.copy()).copy(),0.02,4.6,'svc')
    _mat = confusion_matrix(y7_test.copy(),y7_pred.copy())
    _res_svc.append(calc_from_matrix(_mat.copy()))
    #onvsall
    y7_test,y7_pred=_predict(_split_(df_train.copy(),df_test.copy()).copy(),0.02,4.6,'onevsall')
    _mat = confusion_matrix(y7_test.copy(),y7_pred.copy())
    _res_onvsall.append(calc_from_matrix(_mat.copy()))

#SVC
print("SVC results =========")
computeConfusionMatrix(_res_svc.copy())
#SVC
print("ONE vs All results =========")
computeConfusionMatrix(_res_onvsall.copy())

#### On Shuffled Dataset

In [None]:
from sklearn.metrics import confusion_matrix

np.random.seed(2)
data7s=data7.copy()
data7s=data7s.reindex(np.random.permutation(data7s.index))

kf = KFold(n_splits=5)
_res_svc=[]
_res_onvsall=[]

for train, test in kf.split(data7s.copy()):
    
    df_train=pd.DataFrame(data7s.iloc[train])
    df_test=pd.DataFrame(data7s.iloc[test])
    #SVC
    y7_test,y7_pred=_predict(_split_(df_train.copy(),df_test.copy()).copy(),0.02,4.6,'svc')
    _mat = confusion_matrix(y7_test.copy(),y7_pred.copy())
    _res_svc.append(calc_from_matrix(_mat.copy()))
    #onvsall
    y7_test,y7_pred=_predict(_split_(df_train.copy(),df_test.copy()).copy(),0.02,4.6,'onevsall')
    _mat = confusion_matrix(y7_test.copy(),y7_pred.copy())
    _res_onvsall.append(calc_from_matrix(_mat.copy()))
    

#SVC
print("SVC results =========")
computeConfusionMatrix(_res_svc.copy())
#SVC
print("ONE vs All results =========")
computeConfusionMatrix(_res_onvsall.copy())

#### On Down Sampled Dataset

In [None]:
_train_test__ = _downSample(data7.copy(),60)
_res_svc=[]
_res_onvsall=[]
for train, test in _train_test__:

    #y7_test__,y7_pred__=_predict(_split_(train.copy(),test.copy()),0.02,4.6)
    #SVC
    y7_test,y7_pred=_predict(_split_(train.copy(),test.copy()).copy(),0.02,4.6,'svc')
    _mat = confusion_matrix(y7_test.copy(),y7_pred.copy())
    _res_svc.append(calc_from_matrix(_mat.copy()))
    #onvsall
    y7_test,y7_pred=_predict(_split_(train.copy(),test.copy()).copy(),0.02,4.6,'onevsall')
    _mat = confusion_matrix(y7_test.copy(),y7_pred.copy())
    _res_onvsall.append(calc_from_matrix(_mat.copy()))

#SVC
print("SVC results =========")
computeConfusionMatrix(_res_svc.copy())
#SVC
print("ONE vs All results =========")
computeConfusionMatrix(_res_onvsall.copy())

### 8-class

In [None]:
kf = KFold(n_splits=5)

In [None]:
data8=pd.read_csv(os.path.join('dataset','aac8.csv'))

In [None]:
y = data8['label'].values
le=LabelEncoder()
y=le.fit_transform(y)
data8['label']=y

### Results 8-classes

#### On Sorted

In [None]:
kf = KFold(n_splits=5)
_res_svc=[]
_res_onvsall=[]
for train, test in kf.split(data8.copy()):

    df_train=pd.DataFrame(data8.iloc[train])
    df_test=pd.DataFrame(data8.iloc[test])
    #SVC
    y7_test,y7_pred=_predict(_split_(df_train.copy(),df_test.copy()).copy(),0.02,4.1,'svc')
    _mat = confusion_matrix(y7_test.copy(),y7_pred.copy())
    _res_svc.append(calc_from_matrix(_mat.copy()))
    #onvsall
    y7_test,y7_pred=_predict(_split_(df_train.copy(),df_test.copy()).copy(),0.02,4.1,'onevsall')
    _mat = confusion_matrix(y7_test.copy(),y7_pred.copy())
    _res_onvsall.append(calc_from_matrix(_mat.copy()))

#SVC
print("SVC results =========")
computeConfusionMatrix(_res_svc.copy())
#SVC
print("ONE vs All results =========")
computeConfusionMatrix(_res_onvsall.copy())

#### On Shuffled

In [None]:
from sklearn.metrics import confusion_matrix

np.random.seed(2)
data8s=data8.copy()
data8s=data8s.reindex(np.random.permutation(data8s.index))

kf = KFold(n_splits=5)
_res_svc=[]
_res_onvsall=[]

for train, test in kf.split(data8s.copy()):
    
    df_train=pd.DataFrame(data8s.iloc[train])
    df_test=pd.DataFrame(data8s.iloc[test])
    #SVC
    y7_test,y7_pred=_predict(_split_(df_train.copy(),df_test.copy()).copy(),0.02,4.1,'svc')
    _mat = confusion_matrix(y7_test.copy(),y7_pred.copy())
    _res_svc.append(calc_from_matrix(_mat.copy()))
    #onvsall
    y7_test,y7_pred=_predict(_split_(df_train.copy(),df_test.copy()).copy(),0.02,4.1,'onevsall')
    _mat = confusion_matrix(y7_test.copy(),y7_pred.copy())
    _res_onvsall.append(calc_from_matrix(_mat.copy()))
    

#SVC
print("SVC results =========")
computeConfusionMatrix(_res_svc.copy())
#SVC
print("ONE vs All results =========")
computeConfusionMatrix(_res_onvsall.copy())

In [None]:
_train_test__ = _downSample(data8.copy(),60)
_res_svc=[]
_res_onvsall=[]
for train, test in _train_test__:

    #y7_test__,y7_pred__=_predict(_split_(train.copy(),test.copy()),0.02,4.6)
    #SVC
    y7_test,y7_pred=_predict(_split_(train.copy(),test.copy()).copy(),0.02,4.6,'svc')
    _mat = confusion_matrix(y7_test.copy(),y7_pred.copy())
    _res_svc.append(calc_from_matrix(_mat.copy()))
    #onvsall
    y7_test,y7_pred=_predict(_split_(train.copy(),test.copy()).copy(),0.02,4.6,'onevsall')
    _mat = confusion_matrix(y7_test.copy(),y7_pred.copy())
    _res_onvsall.append(calc_from_matrix(_mat.copy()))

#SVC
print("SVC results =========")
computeConfusionMatrix(_res_svc.copy())
#SVC
print("ONE vs All results =========")
computeConfusionMatrix(_res_onvsall.copy())