### Imports

In [31]:
import os
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.metrics import matthews_corrcoef
from sklearn.preprocessing import LabelEncoder
from sklearn.multiclass import OneVsRestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_recall_fscore_support
from sklearn.model_selection import KFold
from sklearn.metrics import classification_report

### Functions

In [55]:
#Accuracy
def _accuracy(actual, predicted):
    return accuracy_score(actual, predicted)
        

#Sensitivity(Recall)
def _sens(actual, predicted):
    return precision_recall_fscore_support(actual, predicted,average='weighted')[1]

#MCC
def _mcc(actual, predicted):
    return matthews_corrcoef(actual,predicted)

#up-sampling
def _upSample(_dataset, _size):
    
    # Creating the unique class names and then sorting
    _classes=sorted(_dataset['label'].unique())
    # Putting each class in a differnet data frame
    _data_by_class=[_dataset[_dataset['label']==_class] for _class in _classes ]
    
    for ii in range(len(_data_by_class)):
        np.random.seed(2)
        _data_by_class[ii]=_data_by_class[ii].reindex(np.random.permutation(_data_by_class[ii].index))

        if len(_data_by_class[ii])>_size:
            _data_by_class[ii]=_data_by_class[ii][:_size][:]
        else:
            _time = _size // len(_data_by_class[ii])
            _res=[]
            if _time > 1:
                _time=_time+1
            for xx in range(_time):
                _res.append(_data_by_class[ii])
            
            _data_by_class[ii]=pd.concat([item for item in _res], axis=0)
            _data_by_class[ii]=_data_by_class[ii][:_size][:]
                
            
    _folds_by_class_=[]
    _num_of_folds=5
    # Creating folds from each class
    for _item in _data_by_class:
        co=_size//_num_of_folds
        _folds_by_class_.append([_item[(i*co):((i*co)+co)] for i in range(_num_of_folds)])
    
    # Creating Folds from the whole feature
    # by concatenation of each fold from each class
    _folds_=[
        pd.concat([_folds_by_class_[i][j] for i in range(len(_folds_by_class_))]) 
        for j in range(_num_of_folds)
        ]
    
    # Train,Test out of 5 folds
    _train_test_=[]
    for ii in range(_num_of_folds):
        _test__ =_folds_[ii]
        _train__=pd.concat([_folds_[xx] for xx in range(_num_of_folds) if xx!=ii])
        _train_test_.append([_train__,_test__])
    
    return _train_test_
    
    
#down-Smapling Data
def _downSample(_dataset, _size):

    # Creating the unique class names and then sorting
    _classes=sorted(_dataset['label'].unique())
    # Putting each class in a differnet data frame
    _data_by_class=[_dataset[_dataset['label']==_class] for _class in _classes ]
    
    _folds_by_class_=[]
    _num_of_folds=5
    # Creating folds from each class
    for _item in _data_by_class:
        np.random.seed(2)
        _item=_item.reindex(np.random.permutation(_item.index))
        co=_size//_num_of_folds
        _folds_by_class_.append([_item[(i*co):((i*co)+co)] for i in range(_num_of_folds)])
    
    # Creating Folds from the whole feature
    # by concatenation of each fold from each class
    _folds_=[
        pd.concat([_folds_by_class_[i][j] for i in range(len(_folds_by_class_))]) 
        for j in range(_num_of_folds)
        ]
    
    # Train,Test out of 5 folds
    _train_test_=[]
    for ii in range(_num_of_folds):
        _test__ =_folds_[ii]
        _train__=pd.concat([_folds_[xx] for xx in range(_num_of_folds) if xx!=ii])
        _train_test_.append([_train__,_test__])
    
    return _train_test_

#Splits Data into [X_train, X_test, y_train, y_test]
def _split_(_train,_test):
    
    y_test = _test['label'].values
    del _test['label']
    X_test = _test.values
    
    y_train = _train['label'].values
    del _train['label']
    X_train = _train.values

    
    return [X_train, X_test, y_train, y_test]

#Prediciton
def _predict(_train_test,_g,_c):
    
    X_train, X_test, y_train, y_test = _train_test
    
    _gamma,_C =_g,_c
    #classifier = OneVsRestClassifier( SVC(kernel='rbf', gamma=_gamma,C=_C) )
    classifier=SVC(kernel='rbf', gamma=_gamma,C=_C, decision_function_shape='ovr')    

    #Training the algorithm on training data
    classifier.fit(X_train, y_train)

    # Predicting the Test set results
    y_pred = classifier.predict(X_test)

    return [y_test.copy(),y_pred.copy()]

### Main Program

In [5]:
kf = KFold(n_splits=5)

In [44]:
data7=pd.read_csv(os.path.join('dataset','aac7.csv'))

In [7]:
y = data7['label'].values
le=LabelEncoder()
y=le.fit_transform(y)
data7['label']=y

### Results

#### No Shuffleing

In [11]:
kf = KFold(n_splits=5)
res=[]
for train, test in kf.split(data7.copy()):

    df_train=pd.DataFrame(data7.iloc[train])
    df_test=pd.DataFrame(data7.iloc[test])
    y7_test,y7_pred=_predict(_split_(df_train.copy(),df_test.copy()).copy(),0.02,5.0)
    
    res.append([_accuracy(y7_test.copy(),y7_pred.copy()),
                  _mcc(y7_test.copy(),y7_pred.copy()),
                  _sens(y7_test.copy(),y7_pred.copy())
              ])

# print(aacuracy, MCC, Sensiticity)
print('_acc = ',sum([_num[0] for _num in res])/5)
print('_mcc = ',sum([_num[1] for _num in res])/5)
print('_sens = ',sum([_num[2] for _num in res])/5)

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  'recall', 'true', average, warn_for)


_acc =  0.1833333333333333
_mcc =  0.030775864561116317
_sens =  0.1833333333333333


  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


#### On Shuffled Dataset

In [15]:
np.random.seed(2)
data7s=data7.copy()
data7s=data7s.reindex(np.random.permutation(data7s.index))

kf = KFold(n_splits=5)
_res=[]
for train, test in kf.split(data7s.copy()):
    
    df_train=pd.DataFrame(data7s.iloc[train])
    df_test=pd.DataFrame(data7s.iloc[test])
    y7_test,y7_pred=_predict(_split_(df_train.copy(),df_test.copy()).copy(),0.02,4.1)
    
    _res.append([_accuracy(y7_test.copy(),y7_pred.copy()),
                  _mcc(y7_test.copy(),y7_pred.copy()),
                  _sens(y7_test.copy(),y7_pred.copy())
              ])

# print(aacuracy, MCC, Sensiticity)
print('_acc = ',sum([_num[0] for _num in _res])/5)
print('_mcc = ',sum([_num[1] for _num in _res])/5)
print('_sens = ',sum([_num[2] for _num in _res])/5)

_acc =  0.44871794871794873
_mcc =  0.2828579969464605
_sens =  0.44871794871794873


#### On Down Sampled Dataset

In [73]:
_train_test__ = _downSample(data7.copy(),60)
_res__=[]
for train, test in _train_test__:

    y7_test__,y7_pred__=_predict(_split_(train.copy(),test.copy()),0.03,4.5)
    
    _res__.append([_accuracy(y7_test__.copy(),y7_pred__.copy()),
          _mcc(y7_test__.copy(),y7_pred__.copy()),
          _sens(y7_test__.copy(),y7_pred__.copy())
              ])

print('_acc = ',sum([_num[0] for _num in _res__])/5)
print('_mcc = ',sum([_num[1] for _num in _res__])/5)
print('_sens = ',sum([_num[2] for _num in _res__])/5)

_acc =  0.4523809523809524
_mcc =  0.36410513166292036
_sens =  0.4523809523809524
