In [160]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy
import os
import glob
from sklearn import preprocessing
from pysptools import spectro as sp
from IPython.display import display
import ipywidgets as widgets
from math import*

In [175]:
### https://pysptools.sourceforge.io/distance.html
### https://dataaspirant.com/five-most-popular-similarity-measures-implementation-in-python/
### https://towardsdatascience.com/four-ways-to-quantify-synchrony-between-time-series-data-b99136c4a9c9
### https://towardsdatascience.com/classical-least-squares-method-for-quantitative-spectral-analysis-with-python-1926473a802c

class Widegets:
    def on_value_change_min(change):
        new_min = change['new']
        if new_min + 100 > maxWavelength.value:
            minWavelength.value = maxWavelength.value - 100

    def on_value_change_max(change):
        new_max = change['new']
        if minWavelength.value > new_max - 100:
            maxWavelength.value = minWavelength.value + 100
        
        
class FolderAndFileProcessing:
    def ReadFilePaths(source, ext):
        return glob.glob(source + "/**/*." + ext, recursive = True)
    
    def ReadFile(file, seperation):
        return pd.read_csv(file, sep = seperation)
    
    def GetValuesFromFile(file, seperation, column):
        fileData = FolderAndFileProcessing.ReadFile(file, seperation)
        return fileData[column]
    
    def WriteDictToExcel(targetPath, filename, dictionary):
        df = pd.DataFrame.from_dict(dictionary)
        os.makedirs(targetPath, exist_ok=True)
        df.to_excel(targetPath+filename)
        
class SpectraComparision:
    
    def MeanSquareRootComparision(fileList, spectraValue, filenameList, minIndex, maxIndex):
        mineralScore = {}
        for index in range(len(fileList)):
            idealReflectance = fileList[index]
            result = spectraValue[minIndex:maxIndex] - idealReflectance[minIndex:maxIndex]
            score = np.sqrt(np.mean(result**2))
            mineralScore[filenameList[index].split('/')[-1].split('_')[0]] = score
        mineralScore = sorted(mineralScore.items(), key=lambda x: x[1])
        return mineralScore
    
    def MeanSquareRootComparisionNormalized(fileList, spectraValue, filenameList, minIndex, maxIndex):
        mineralScore = {}
        for index in range(len(fileList)):
            idealReflectance = fileList[index]
            result = spectraValue[minIndex:maxIndex] - idealReflectance[minIndex:maxIndex]
            score = np.sqrt(np.mean(result**2))
            mineralScore[filenameList[index].split('/')[-1].split('_')[0]] = score
        mineralScore = sorted(mineralScore.items(), key=lambda x: x[1])
        return mineralScore
    
    def CrossCorrelation(fileList, spectraValue, filenameList, minIndex, maxIndex):
        mineralScore = {}
        for index in range(len(fileList)):
            idealReflectance = fileList[index]
            result = np.corrcoef(spectraValue[minIndex:maxIndex], idealReflectance[minIndex:maxIndex])
            score = result[0, 1]
            mineralScore[filenameList[index].split('/')[-1].split('_')[0]] = score
        mineralScore = sorted(mineralScore.items(), key=lambda x: x[1], reverse=True)
        return mineralScore
    
    def CosineSimilarityContinumRemoved(fileList, spectraValue, filenameList, minIndex, maxIndex):
        mineralScore = {}
        for index in range(len(fileList)):
            idealReflectance = fileList[index]
            score = SpectraComparision.cosine_similarity(spectraValue[minIndex:maxIndex], idealReflectance[minIndex:maxIndex])
            mineralScore[filenameList[index].split('/')[-1].split('_')[0]] = score
        mineralScore = sorted(mineralScore.items(), key=lambda x: x[1], reverse=True)
        return mineralScore
    
    def computeSimilarityContinumRemoved(fileList, spectraValue, filenameList, minIndex, maxIndex):
        mineralScore = {}
        for index in range(len(fileList)):
            idealReflectance = fileList[index]
            score = SpectraComparision.compute_similarity(spectraValue[minIndex:maxIndex], idealReflectance[minIndex:maxIndex])
            mineralScore[filenameList[index].split('/')[-1].split('_')[0]] = score
        mineralScore = sorted(mineralScore.items(), key=lambda x: x[1])
        return mineralScore
    
    def SAM(fileList, spectraValue, filenameList, minIndex, maxIndex):
        mineralScore = {}
        for index in range(len(fileList)):
            idealReflectance = fileList[index]
            score = distance.SAM(spectraValue[minIndex:maxIndex], idealReflectance[minIndex:maxIndex])
            mineralScore[filenameList[index].split('/')[-1].split('_')[0]] = score
        mineralScore = sorted(mineralScore.items(), key=lambda x: x[1], reverse=True)
        return mineralScore
    
    def chebyshev(fileList, spectraValue, filenameList, minIndex, maxIndex):
        mineralScore = {}
        for index in range(len(fileList)):
            idealReflectance = fileList[index]
            score = distance.chebyshev(spectraValue[minIndex:maxIndex], idealReflectance[minIndex:maxIndex])
            mineralScore[filenameList[index].split('/')[-1].split('_')[0]] = score
        mineralScore = sorted(mineralScore.items(), key=lambda x: x[1])
        return mineralScore

    def SpectralInformationDivergence(fileList, spectraValue, fileListContinumRemoval, waveLength, minIndex, maxIndex):
        mineralScore = {}
        for index in range(len(fileList)):
            idealReflectance = fileList[index]
            score = distance.SID(spectraValue[minIndex:maxIndex], idealReflectance[minIndex:maxIndex])
            mineralScore[filenameList[index].split('/')[-1].split('_')[0]] = score
        mineralScore = sorted(mineralScore.items(), key=lambda x: x[1])
        return mineralScore
    
    def compute_similarity(ref_rec,input_rec,weightage=[0.33,0.33,0.33]):
        ref_rec = np.array(ref_rec)
        input_rec = np.array(input_rec)
        ## Time domain similarity
        ref_time = np.correlate(ref_rec,ref_rec)    
        inp_time = np.correlate(ref_rec,input_rec)
        diff_time = abs(ref_time-inp_time)

        ## Freq domain similarity
        ref_freq = np.correlate(np.fft.fft(ref_rec),np.fft.fft(ref_rec)) 
        inp_freq = np.correlate(np.fft.fft(ref_rec),np.fft.fft(input_rec))
        diff_freq = abs(ref_freq-inp_freq)

        ## Power similarity
        ref_power = np.sum(ref_rec**2)
        inp_power = np.sum(input_rec**2)
        diff_power = abs(ref_power-inp_power)
        
        return float(weightage[0]*diff_time+weightage[1]*diff_freq+weightage[2]*diff_power)
    

    
    def square_rooted(x):
        return round(sqrt(sum([a*a for a in x])),3)
 
    def cosine_similarity(x,y):
        numerator = sum(a*b for a,b in zip(x,y))
        denominator = SpectraComparision.square_rooted(x)* SpectraComparision.square_rooted(y)
        return round(numerator/float(denominator),3)

### Variable Declarations

In [162]:
##txt file declarations
txtPath = "../data/LR-Raman_Processed/"
extensionTxt= 'txt'

###sed file declarations
espPath = "../data/Vulcano Raman Processed/"
extensionEsp= 'txt'

###Descibing wavelengths
minWavelength = widgets.IntSlider(value=160, min=160, max=4000)
maxWavelength = widgets.IntSlider(value=4000, min=160, max=4000)
minWavelength.description = 'min wav'
maxWavelength.description = 'max wav'
        
minWavelength.observe(Widegets.on_value_change_min, names='value')
maxWavelength.observe(Widegets.on_value_change_max, names='value')

### Select the wavelength range

In [173]:
display(minWavelength, maxWavelength)

IntSlider(value=160, description='min wav', max=4000, min=160)

IntSlider(value=1300, description='max wav', max=4000, min=160)

In [165]:
minIndex = minWavelength.value - 160
maxIndex = maxWavelength.value - 160 + 1

In [166]:
maxIndex

1141

### Populating paths

In [167]:
txt_files = FolderAndFileProcessing.ReadFilePaths(txtPath,extensionTxt)
esp_files = FolderAndFileProcessing.ReadFilePaths(espPath,extensionEsp)

### Loading Ideal Dataset

In [168]:
idealFileList = []
for index in range(len(txt_files)):
    idealReflectance = FolderAndFileProcessing.GetValuesFromFile(txt_files[index], ' ','reflectance')
    idealFileList.append(idealReflectance)

In [169]:
txt_files[0].split('/')[-1].split('_')[0]

'Antlerite'

### results

In [74]:
for index in range(len(esp_files)):
    testData = FolderAndFileProcessing.GetValuesFromFile(esp_files[index], ' ','reflectance')
    dictMineralValue = SpectraComparision.CrossCorrelation(idealFileList, testData, txt_files, minIndex, maxIndex)
    targetfilename = esp_files[index].replace('/Vulcano Raman Processed','/CrossCorrelation_1300').split('/')[-1].replace('.txt','.xlsx')
    list_string = esp_files[index].replace('/Vulcano Raman Processed','/CrossCorrelation_1300').split('/')[0:-1]
    targetPath = '/'.join(list_string) + '/'
    FolderAndFileProcessing.WriteDictToExcel(targetPath, targetfilename, dictMineralValue)

### Plot images with the results

In [171]:
for index in range(len(esp_files)):
    testData = FolderAndFileProcessing.GetValuesFromFile(esp_files[index], ' ','reflectance')
    dictMineralValue = SpectraComparision.CosineSimilarityContinumRemoved(idealFileList, testData, txt_files, minIndex, maxIndex)
    targetfilename = esp_files[index].replace('/Vulcano Raman Processed','/CosineSimilarity_1300').split('/')[-1].replace('.txt','.xlsx')
    list_string = esp_files[index].replace('/Vulcano Raman Processed','/CosineSimilarity_1300').split('/')[0:-1]
    targetPath = '/'.join(list_string) + '/'
    FolderAndFileProcessing.WriteDictToExcel(targetPath, targetfilename, dictMineralValue)
    
    mineral1 = dictMineralValue[0][0]
    mineral2 = dictMineralValue[1][0]
    mineral3 = dictMineralValue[2][0]
    mineral4 = dictMineralValue[3][0]
    mineral5 = dictMineralValue[4][0]
    mineral6 = dictMineralValue[5][0]
    
    imagePath1 = list(s for s in txt_files if mineral1.lower() in s.lower())[0]
    imagePath2 = list(s for s in txt_files if mineral2.lower() in s.lower())[0]
    imagePath3 = list(s for s in txt_files if mineral3.lower() in s.lower())[0]
    imagePath4 = list(s for s in txt_files if mineral4.lower() in s.lower())[0]
    imagePath5 = list(s for s in txt_files if mineral5.lower() in s.lower())[0]
    imagePath6 = list(s for s in txt_files if mineral6.lower() in s.lower())[0]
    
    signal1 = FolderAndFileProcessing.GetValuesFromFile(imagePath1, ' ','reflectance')
    signal2 = FolderAndFileProcessing.GetValuesFromFile(imagePath2, ' ','reflectance')
    signal3 = FolderAndFileProcessing.GetValuesFromFile(imagePath3, ' ','reflectance')
    signal4 = FolderAndFileProcessing.GetValuesFromFile(imagePath4, ' ','reflectance')
    signal5 = FolderAndFileProcessing.GetValuesFromFile(imagePath5, ' ','reflectance')
    signal6 = FolderAndFileProcessing.GetValuesFromFile(imagePath6, ' ','reflectance')
    
    xaxis = list(range(minWavelength.value,maxWavelength.value + 1))
    fig = plt.figure(figsize=(24, 12))
    plt.plot(xaxis, testData[minIndex:maxIndex], label = targetfilename.replace('.xlsx',''))
    plt.plot(xaxis, signal1[minIndex:maxIndex], label = mineral1)
    plt.plot(xaxis, signal2[minIndex:maxIndex], label = mineral2)
    plt.plot(xaxis, signal3[minIndex:maxIndex], label = mineral3)
    plt.plot(xaxis, signal4[minIndex:maxIndex], label = mineral4)
    plt.plot(xaxis, signal5[minIndex:maxIndex], label = mineral5)
    plt.plot(xaxis, signal6[minIndex:maxIndex], label = mineral6)
    plt.legend(loc="upper left")
    plt.savefig(esp_files[index].replace('/Vulcano Raman Processed', '/CosineSimilarity_1300').replace('txt','png'), dpi=500)
    plt.close()