In [None]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
@author: M Arshad Zahangir Chowdhury

SVM cross-validations based on pressure and concentrations

"""

%matplotlib inline 

import sys
import os

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy import signal
from ipywidgets import interactive
import seaborn as sns  #heat map
import glob # batch processing of images

if '../../' not in sys.path:
    sys.path.append('../../')

from src.spectral_datasets.IR_datasets import IR_data


from src.misc.utils import *

import datetime

from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import GroupKFold


from sklearn import svm
from sklearn.svm import SVC
from sklearn.multiclass import OneVsRestClassifier 

from scipy import interpolate
from sys import getsizeof

path_exp = "../../data/IR_Experimental_Data/"

### unbound localerror:  without path to cross-validation data

In [None]:
s = IR_data(data_start = 400, data_end = 4000, resolution=1, verbosity = False, cv_type = 'pressure')
s.load_IR_data()
s.dataset_info()

In [None]:
X = s.spectra
y = s.targets
labels = s.labels
n_compounds = s.n_compounds
n_spectrum = s.n_spectrum
n_spectra = s.n_compounds*s.n_spectrum
samplesize = s.samplesize
wavenumbers = s.frequencies
print('shape of features:', X.shape)
print('shape of labels:', y.shape)

## Pressure Cross-validation

In [None]:
#Create Stratified k-fold


totalnumber_folds=10
pd.set_option("display.max_rows", None, "display.max_columns", None)

skf = StratifiedKFold(n_splits=totalnumber_folds,random_state=None, shuffle=False)
skf.get_n_splits(X, y)


#Intialize fold counter before looping through them
foldcounter=0
Fold_number=np.array([])
Fold_Times=np.array([])
Fold_Class_Acc=np.array([])
Fold_Class_Precision=np.array([])
Fold_Class_Recall=np.array([])
Fold_Class_F1=np.array([])



for train_index, test_index in skf.split(X, y):
    foldcounter+=1
    print("Fold number: ", foldcounter)
    print("\n")
    
    train_index_DF=pd.DataFrame(train_index)
    test_index_DF=pd.DataFrame(test_index)

    print('----\n')
    print('TEST INDICES')
    print(test_index_DF)
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    


In [None]:
#Stratfied OVR(SVM-Linear)

foldcounter=0
Fold_number=np.array([])
Fold_Times=np.array([])
Fold_Class_Acc=np.array([])
Fold_Class_Precision=np.array([])
Fold_Class_Recall=np.array([])
Fold_Class_F1=np.array([])



for train_index, test_index in skf.split(X, y):
    foldcounter+=1
    
    train_index_DF=pd.DataFrame(train_index)
    test_index_DF=pd.DataFrame(test_index)

    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
           
    

    print('Fold ', foldcounter)
    
    #OneVsRest (SVM-Linear)

    
    
    t_start = datetime.datetime.now()

    

    classifier_OVR = OneVsRestClassifier(SVC(kernel='linear',C = 500,decision_function_shape = 'ovo',random_state=1)).fit(X_train, y_train)


    pred_y = classifier_OVR.predict(X_test)

    

    cm_OVR = confusion_matrix(y_test, pred_y)
    
    fig = plt.figure(figsize=(16,10));

    plt.title('OVR(Support Vector Machine-Linear) \nIteration '+str(foldcounter));
    ax = sns.heatmap(cm_OVR,linewidths=2, annot=True, cmap='RdPu');   
    
    ax.set_xticklabels(labels);
    ax.set_yticklabels(labels);
    plt.xlabel('Predicted Molecule');
    plt.ylabel('Actual Moelcule');
    plt.xticks(rotation=90);
    plt.yticks(rotation=0);


    t_end = datetime.datetime.now()
    delta = t_end - t_start
    Time_OVR=delta.total_seconds() * 1000


    
    
    
    
    plt.savefig('KFOLDFIGURES/CM_OVR_SVM-Linear_P'+ str(foldcounter) + '.png',bbox_inches='tight')
    plt.close()

    

    
    Fold_number=np.append(Fold_number,[foldcounter], axis=0)
    Fold_Times=np.append(Fold_Times,[Time_OVR], axis=0)
    Fold_Class_Acc=np.append(Fold_Class_Acc,[accuracy_score(y_test, pred_y)], axis=0)
    Fold_Class_Precision=np.append(Fold_Class_Precision,[(precision_score(y_test, pred_y, labels=[0],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[1],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[2],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[3],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[4],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[5],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[6],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[7],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[8],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[9],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[10],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[11],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[12],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[13],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[14],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[15],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[16],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[17],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[18],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[19],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[20],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[21],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[22],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[23],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[24],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[25],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[26],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[27],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[28],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[29],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[30],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[31],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[32],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[33],average='micro')
                                                          
                                                          )/n_compounds
                                                        
                                                        ], axis=0)
    Fold_Class_Recall=np.append(Fold_Class_Recall,[(recall_score(y_test, pred_y, labels=[0],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[1],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[2],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[3],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[4],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[5],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[6],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[7],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[8],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[9],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[10],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[11],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[12],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[13],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[14],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[15],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[16],average='micro')                                                    
                                                          + recall_score(y_test, pred_y, labels=[17],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[18],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[19],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[20],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[21],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[22],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[23],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[24],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[25],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[26],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[27],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[28],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[29],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[30],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[31],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[32],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[33],average='micro')
                                                          )/n_compounds
                                                        
                                                        ], axis=0)
    
    Fold_Class_F1=np.append(Fold_Class_F1,[(f1_score(y_test, pred_y, labels=[0],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[1],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[2],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[3],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[4],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[5],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[6],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[7],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[8],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[9],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[10],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[11],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[12],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[13],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[14],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[15],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[16],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[17],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[18],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[19],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[20],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[21],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[22],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[23],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[24],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[25],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[26],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[27],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[28],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[29],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[30],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[31],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[32],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[33],average='micro')
                                                   )/n_compounds
                                                        
                                                        ], axis=0)
    


kfoldResults_OVR_SVM = pd.DataFrame({'Method': 'OVR(SVM-Linear)',
                             'Fold number': Fold_number,
                             'Time': Fold_Times,
                             'Accuracy': Fold_Class_Acc,
                             'Average Precision': Fold_Class_Precision,
                             'Average Recall': Fold_Class_Recall,
                             'Average F1 score': Fold_Class_F1
                            })


plt.scatter(Fold_Class_Recall,Fold_Class_Precision);


In [None]:
#append results from all folds
kfoldResults = pd.DataFrame({})


kfoldResults = kfoldResults.append(kfoldResults_OVR_SVM, ignore_index=True)


# kfoldResults.to_csv('kfoldResults_P' +'.csv', index=False)
kfoldResults

In [None]:
s = IR_data(data_start = 400, data_end = 4000, resolution=1, verbosity = False, cv_type = 'concentration')
s.load_IR_data()
s.dataset_info()

In [None]:
X = s.spectra
y = s.targets
labels = s.labels
n_compounds = s.n_compounds
n_spectrum = s.n_spectrum
n_spectra = s.n_compounds*s.n_spectrum
samplesize = s.samplesize
wavenumbers = s.frequencies
print('shape of features:', X.shape)
print('shape of labels:', y.shape)

In [None]:


#Create Stratified k-fold


totalnumber_folds=7
pd.set_option("display.max_rows", None, "display.max_columns", None)


skf = StratifiedKFold(n_splits=totalnumber_folds,random_state=None, shuffle=False)
skf.get_n_splits(X, y)

foldcounter=0
Fold_number=np.array([])
Fold_Times=np.array([])
Fold_Class_Acc=np.array([])
Fold_Class_Precision=np.array([])
Fold_Class_Recall=np.array([])
Fold_Class_F1=np.array([])



for train_index, test_index in skf.split(X, y):
    foldcounter+=1
    print("Fold number: ", foldcounter)
    print("\n")
    
    train_index_DF=pd.DataFrame(train_index)
    test_index_DF=pd.DataFrame(test_index)

    print('----\n')
    print('TEST INDICES')
    print(test_index_DF)
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    


In [None]:
#Stratfied OVR(SVM-Linear)


foldcounter=0
Fold_number=np.array([])
Fold_Times=np.array([])
Fold_Class_Acc=np.array([])
Fold_Class_Precision=np.array([])
Fold_Class_Recall=np.array([])
Fold_Class_F1=np.array([])



for train_index, test_index in skf.split(X, y):
    foldcounter+=1
    
    train_index_DF=pd.DataFrame(train_index)
    test_index_DF=pd.DataFrame(test_index)

    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
           
    

    print('Fold ', foldcounter)
    
    #OneVsRest (SVM-Linear)

    
    
    t_start = datetime.datetime.now()

    
    classifier_OVR = OneVsRestClassifier(SVC(kernel='linear',C = 500,decision_function_shape = 'ovo',random_state=1)).fit(X_train, y_train)


    pred_y = classifier_OVR.predict(X_test)

    


    cm_OVR = confusion_matrix(y_test, pred_y)
    
    fig = plt.figure(figsize=(16,10));

    plt.title('OVR(Support Vector Machine-Linear) \nIteration '+str(foldcounter));
    ax = sns.heatmap(cm_OVR,linewidths=2, annot=True, cmap='RdPu');   
    
    ax.set_xticklabels(labels);
    ax.set_yticklabels(labels);
    plt.xlabel('Predicted Molecule');
    plt.ylabel('Actual Moelcule');
    plt.xticks(rotation=90);
    plt.yticks(rotation=0);


    t_end = datetime.datetime.now()
    delta = t_end - t_start
    Time_OVR=delta.total_seconds() * 1000


    
    
    
    
    plt.savefig('KFOLDFIGURES/CM_OVR_SVM-Linear_X'+ str(foldcounter) + '.png',bbox_inches='tight')
    plt.close()

    

    
    Fold_number=np.append(Fold_number,[foldcounter], axis=0)
    Fold_Times=np.append(Fold_Times,[Time_OVR], axis=0)
    Fold_Class_Acc=np.append(Fold_Class_Acc,[accuracy_score(y_test, pred_y)], axis=0)
    Fold_Class_Precision=np.append(Fold_Class_Precision,[(precision_score(y_test, pred_y, labels=[0],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[1],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[2],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[3],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[4],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[5],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[6],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[7],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[8],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[9],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[10],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[11],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[12],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[13],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[14],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[15],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[16],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[17],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[18],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[19],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[20],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[21],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[22],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[23],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[24],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[25],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[26],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[27],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[28],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[29],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[30],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[31],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[32],average='micro')
                                                          + precision_score(y_test, pred_y, labels=[33],average='micro')
                                                          
                                                          )/n_compounds
                                                        
                                                        ], axis=0)
    Fold_Class_Recall=np.append(Fold_Class_Recall,[(recall_score(y_test, pred_y, labels=[0],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[1],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[2],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[3],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[4],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[5],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[6],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[7],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[8],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[9],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[10],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[11],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[12],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[13],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[14],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[15],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[16],average='micro')                                                    
                                                          + recall_score(y_test, pred_y, labels=[17],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[18],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[19],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[20],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[21],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[22],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[23],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[24],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[25],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[26],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[27],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[28],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[29],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[30],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[31],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[32],average='micro')
                                                          + recall_score(y_test, pred_y, labels=[33],average='micro')
                                                          )/n_compounds
                                                        
                                                        ], axis=0)
    
    Fold_Class_F1=np.append(Fold_Class_F1,[(f1_score(y_test, pred_y, labels=[0],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[1],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[2],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[3],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[4],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[5],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[6],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[7],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[8],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[9],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[10],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[11],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[12],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[13],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[14],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[15],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[16],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[17],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[18],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[19],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[20],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[21],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[22],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[23],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[24],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[25],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[26],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[27],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[28],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[29],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[30],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[31],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[32],average='micro')
                                                          + f1_score(y_test, pred_y, labels=[33],average='micro')
                                                   )/n_compounds
                                                        
                                                        ], axis=0)
    


kfoldResults_OVR_SVM = pd.DataFrame({'Method': 'OVR(SVM-Linear)',
                             'Fold number': Fold_number,
                             'Time': Fold_Times,
                             'Accuracy': Fold_Class_Acc,
                             'Average Precision': Fold_Class_Precision,
                             'Average Recall': Fold_Class_Recall,
                             'Average F1 score': Fold_Class_F1
                            })


plt.scatter(Fold_Class_Recall,Fold_Class_Precision);


In [None]:
#append results from all folds
kfoldResults = pd.DataFrame({})


kfoldResults = kfoldResults.append(kfoldResults_OVR_SVM, ignore_index=True)


# kfoldResults.to_csv('kfoldResults_X' +'.csv', index=False)
kfoldResults