In [None]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
@author: M Arshad Zahangir Chowdhury

SVM implementations, cross-validations, 70%-30% train-test and experimental recognition in IR range.

"""

%matplotlib inline 

import sys
import os

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy import signal
from ipywidgets import interactive
import seaborn as sns  
import glob 

if '../../' not in sys.path:
    sys.path.append('../../')

from src.spectral_datasets.IR_datasets import IR_data


from src.misc.utils import *

import datetime

from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import GroupKFold


from sklearn import svm
from sklearn.svm import SVC
from sklearn.multiclass import OneVsRestClassifier 

from scipy import interpolate
from sys import getsizeof

path_exp = "../../data/IR_Experimental_Data/"

## load IR spectra and view attributes

In [None]:
# Tunable frequency range, change data start and end location
s = IR_data(data_start = 400, data_end = 4000, resolution=1, verbosity = True)
s.load_IR_data()
s.dataset_info()


In [None]:
X = s.spectra
y = s.targets
labels = s.labels
n_compounds = s.n_compounds
n_spectrum = s.n_spectrum
n_spectra = s.n_compounds*s.n_spectrum
samplesize = s.samplesize
wavenumbers = s.frequencies
print('shape of features:', X.shape)
print('shape of labels:', y.shape)

## 70%-30% train-test split for classifiers SVM classifier with linear and RBF kernel

In [None]:
TRAIN_SIZE=0.70
TEST_SIZE=1-TRAIN_SIZE

train_X, test_X, train_y, test_y = train_test_split(X, y, train_size=TRAIN_SIZE,
                                                   test_size=TEST_SIZE,
                                                   random_state=123,
                                                   stratify=y
                                                   )

print("All:", np.bincount(y) / float(len(y))*100  )
print("Training:", np.bincount(train_y) / float(len(train_y))*100  )
print("Testing:", np.bincount(test_y) / float(len(test_y))*100  )


In [None]:
indices = np.arange(n_spectra)

train_X, test_X, train_y, test_y, train_indices, test_indices = train_test_split(X, y, indices, train_size=TRAIN_SIZE,
                                                   test_size=TEST_SIZE,
                                                   random_state=123,
                                                   stratify=y
                                                   )


In [None]:
#OneVsRest (SVM-Linear Kernel)

#Measure time elapsed
t_start = datetime.datetime.now()

classifier_OVR = OneVsRestClassifier(SVC(kernel='linear',C = 500,decision_function_shape = 'ovo',random_state=1)).fit(train_X, train_y)



pred_y = classifier_OVR.predict(test_X)


FCA_OVR=np.sum(pred_y == test_y) / float(len(test_y))
print("Fraction Correct[Accuracy]:", FCA_OVR)


cm_OVR = confusion_matrix(test_y, pred_y)
plt.rc('font', weight='bold')
fig = plt.figure(figsize=(16,10));
ax = sns.heatmap(cm_OVR,linewidths=2, annot=True, cmap='RdPu');  

ax.set_xticklabels(labels);
ax.set_yticklabels(labels);
plt.xlabel('Predicted Molecule',fontsize='medium', fontweight='bold');
plt.ylabel('Actual Moelcule',fontsize='medium', fontweight='bold');
plt.xticks(rotation=90);
plt.yticks(rotation=0);
plt.title('Linear Kernel, C = 500, Accuracy={0:0.2f}%\n'.format(FCA_OVR*100), fontsize='medium', fontweight='bold');
# fig.savefig('RESULTS/Linear Kernel.png', bbox_inches='tight',dpi=300)


print(classification_report(test_y, pred_y))

t_end = datetime.datetime.now()
delta = t_end - t_start
Time_OVR=delta.total_seconds() * 1000

print('Time elaspsed: ', Time_OVR) # milliseconds

In [None]:
#OneVsRest (SVM-rbf Kernel)

#Measure time elapsed
import datetime
t_start = datetime.datetime.now()

classifier_OVR = OneVsRestClassifier(SVC(kernel='rbf',C = 500,decision_function_shape = 'ovo',random_state=1)).fit(train_X, train_y)

pred_y = classifier_OVR.predict(test_X)


FCA_OVR=np.sum(pred_y == test_y) / float(len(test_y))
print("Fraction Correct[Accuracy]:", FCA_OVR)


cm_OVR = confusion_matrix(test_y, pred_y)
plt.rc('font', weight='bold')
fig = plt.figure(figsize=(16,10));

ax = sns.heatmap(cm_OVR,linewidths=2, annot=True, cmap='Greens');  

ax.set_xticklabels(labels);
ax.set_yticklabels(labels);
plt.xlabel('Predicted Molecule',fontsize='medium', fontweight='bold');
plt.ylabel('Actual Moelcule',fontsize='medium', fontweight='bold');
plt.xticks(rotation=90);
plt.yticks(rotation=0);
plt.title('Radial Basis Function Kernel, C = 500, Accuracy={0:0.2f}%\n'.format(FCA_OVR*100), fontsize='medium', fontweight='bold');
# fig.savefig('RESULTS/RBF Kernel.png', bbox_inches='tight',dpi=300)

print(classification_report(test_y, pred_y))

t_end = datetime.datetime.now()
delta = t_end - t_start
Time_OVR=delta.total_seconds() * 1000

print('Time elaspsed: ', Time_OVR) # milliseconds

## notebook ends