In [1]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
@author: M Arshad Zahangir Chowdhury

Experimental recognition in IR range using classifiers.

"""

%matplotlib inline 

import sys
import os

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy import signal
from ipywidgets import interactive
import seaborn as sns  #heat map
import glob # batch processing of images

if '../../' not in sys.path:
    sys.path.append('../../')

from src.spectral_datasets.IR_datasets import IR_data


from src.misc.utils import *

import datetime

#import metrics to evaluate classifiers
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report



# from sklearn.metrics import roc_curve
# https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html

from sklearn.model_selection import train_test_split



from sklearn import svm
from sklearn.svm import SVC
from sklearn.multiclass import OneVsRestClassifier 

from scipy import interpolate
from math import floor, ceil

from sys import getsizeof

from ipywidgets import *
import ipywidgets as widgets
from ipyfilechooser import FileChooser



path_exp = "../../data/IR_Experimental_Data/"

In [2]:
def identify(obj):
    print(fc.selected_filename)
    df_exp = pd.read_excel(fc.selected, sheet_name="Sheet1")
    df_exp.columns = ['wavenumbers', 'absorbance'] # change the column names
    plt.figure(figsize=(16, 3))
    plt.plot(df_exp['wavenumbers'],df_exp['absorbance'])
    print("Min Wavenumber: ", min(df_exp['wavenumbers']))
    print("Max Wavenumber: ", max(df_exp['wavenumbers']))
    print("Available data points:", len(df_exp['wavenumbers'].to_numpy()))
    plt.show()


    s = IR_data(data_start = ceil(min(df_exp['wavenumbers'])), data_end = floor(max(df_exp['wavenumbers'])), resolution=1, verbosity = False)
    s.load_IR_data()
    X = s.spectra
    y = s.targets
    labels = s.labels
    n_compounds = s.n_compounds
    n_spectrum = s.n_spectrum
    n_spectra = s.n_compounds*s.n_spectrum
    samplesize = s.samplesize
    shoretend_samplesize = s.shoretend_samplesize
    wavenumbers = s.frequencies

    TRAIN_SIZE=0.70
    TEST_SIZE=1-TRAIN_SIZE

    train_X, test_X, train_y, test_y = train_test_split(X, y, train_size=TRAIN_SIZE,
                                                       test_size=TEST_SIZE,
                                                       random_state=123,
                                                       stratify=y
                                                       )
    t_start = datetime.datetime.now()
    
    classifier_OVR = OneVsRestClassifier(SVC(kernel='linear',
                                             C = 500,decision_function_shape = 'ovo',
                                             random_state=1)).fit(train_X, train_y)

    
    
    t_end = datetime.datetime.now()
    delta = t_end - t_start
    Time_OVR=delta.total_seconds() * 1000

    print('Training time: ', Time_OVR) # milliseconds
    
    pred_y = classifier_OVR.predict(test_X)


    FCA_OVR=np.sum(pred_y == test_y) / float(len(test_y))
    print("Fraction Correct[Accuracy]:", FCA_OVR)




    f = interpolate.interp1d(df_exp['wavenumbers'].to_numpy(), df_exp['absorbance'].to_numpy())


    dfy_resampled = f(wavenumbers)   # use interpolation function returned by `interp1d`
    dfx_resampled = wavenumbers


    plt.figure(figsize=(16, 3))
    plt.scatter(dfx_resampled,dfy_resampled)

    print("Min Wavenumber (resampled): ", min(dfx_resampled))
    print("Max Wavenumber (resampled): ", max(dfx_resampled))

    Unknown_X=dfy_resampled.reshape(1, shoretend_samplesize) # (ID,Features)



    pred_unknown_y_OVR = classifier_OVR.predict(Unknown_X)



    print('OVR (SVM-linear) says :', pred_unknown_y_OVR, 'so molecule is ',labels[pred_unknown_y_OVR[0]])

fc = FileChooser(path_exp)
# fc = FileChooser('../../data/IR_Experimental_Data/')

fc.filter_pattern = '*.xlsx'

btn_identify = Button(description='Identify')


btn_identify.on_click(identify)
HBox([btn_identify, fc])

HBox(children=(Button(description='Identify', style=ButtonStyle()), FileChooser(path='/home/reshad812/CODES/GI…

### to-do 
Make the plots overlay on each other. matplotlib notebook style.
add other classifier options. Have text output box showing the information instead of output to window.

## notebook ends