### Imports

In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

#Load the beta-VAE module
import bvae_model as bvae
import spectrum_preprocessing as sp




### Load raw spectra

In [2]:
#your path

path = "C:/Users/Zach/Documents/betaVAE_Training/spectral_data/SMP65#013 21d 920um.csv"

In [3]:
filename = path.split("/")[-1]
filename

'SMP65#013 21d 920um.csv'

In [4]:
df = pd.read_csv(path)
print(f"Dataframe shape: {df.shape}")

Dataframe shape: (49152, 2100)


In [5]:
df

Unnamed: 0,map_x,map_y,Sample Name,1981.7 - 2095.8,3997.9328286151754,3996.348861567388,3994.7648945196006,3993.1809274718134,3991.5969604240263,3990.0129933762387,...,693.7775669308426,692.1935998830554,690.6096328352678,689.0256657874806,687.4416987396935,685.8577316919059,684.2737646441187,682.6897975963311,681.105830548544,679.5218635007568
0,39338.3,23186.4,21d 920um,0.027348,-0.005780,-0.009205,-0.005201,0.001749,0.002506,-0.004500,...,0.302362,0.193786,0.215947,0.564841,0.866391,0.922009,0.847334,0.667275,0.636428,0.732022
1,39343.7,23186.4,21d 920um,-0.061878,-0.036621,-0.031631,-0.027286,-0.026879,-0.030003,-0.032886,...,0.125865,0.484513,-0.190309,-0.461016,-0.306359,0.025546,0.018538,-0.326393,-0.268965,-0.047151
2,39349.2,23186.4,21d 920um,0.028535,-0.031337,-0.035186,-0.039305,-0.041607,-0.042279,-0.043588,...,0.152207,0.230090,0.447528,0.658963,0.684246,0.506623,0.413041,0.284461,0.207294,0.288107
3,39354.6,23186.4,21d 920um,-0.096200,-0.002418,-0.000432,0.002651,0.004541,0.002584,-0.001339,...,0.401100,0.351473,0.297579,0.172422,0.165574,0.252624,0.262593,0.300748,0.363351,0.354264
4,39360.0,23186.4,21d 920um,-0.020703,0.009201,0.008855,0.010519,0.013614,0.015160,0.012308,...,0.327754,0.398843,0.454724,0.523624,0.460653,0.476717,0.468539,0.420611,0.381434,0.381283
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49147,41397.6,23875.0,21d 920um,0.011488,-0.032712,-0.034940,-0.031057,-0.026917,-0.026859,-0.028916,...,-0.188778,-0.176608,-0.174143,-0.173849,-0.137289,-0.071008,-0.063503,-0.171026,-0.298044,-0.359977
49148,41403.0,23875.0,21d 920um,0.034984,-0.038594,-0.038690,-0.041098,-0.041789,-0.038232,-0.034172,...,-0.088642,0.024736,0.115752,0.143072,0.088615,0.041816,0.073703,0.149174,0.110572,-0.028574
49149,41408.5,23875.0,21d 920um,0.128355,-0.035480,-0.037087,-0.038141,-0.036412,-0.035022,-0.036521,...,-0.122573,0.012233,0.073951,-0.003435,-0.163660,-0.299349,-0.325833,-0.184658,-0.019320,-0.002990
49150,41413.9,23875.0,21d 920um,0.154755,-0.049924,-0.044821,-0.040673,-0.040812,-0.044673,-0.048131,...,-0.307490,-0.120271,0.070240,0.135427,0.070356,0.036945,0.009713,-0.053422,-0.130538,-0.139763


# Sort the wavenumber columns and data and put in a dataframe
- necessary for input to pipeline below

In [6]:
wavenumber = df.columns.values[4:].astype(float)
wavenumber.sort()
wavenumber = wavenumber.astype(str)
spectra_df = df[wavenumber].copy()
np.dtype(spectra_df.columns.values.astype(float)[0])

dtype('float64')

### Run the pipeline
The `bvae.pipeline()` returns the interpolated wavenumber array; the interpolated absorbance array; the beta-model encodings L1 L2 and L3; the reconstruction MSE; and the reconstructed spectrum.

In [None]:
L1_list=[]
L2_list=[]
L3_list=[]
MSE_list=[]
wavenumber_list=[]
absorbance_list=[]
reconstructed_list=[]
for i in range(spectra_df.shape[0]):    
    f = spectra_df.columns.values[:].astype('float32')
    a = spectra_df.iloc[i,:].values
    try:
        wavenumber, absorbance, encodings, mse, reconstructed = bvae.bvae_pipeline(f, a)
        L1 = encodings[0]
        L2 = encodings[1]
        L3 = encodings[2]
    except Exception as e:
        print(e)
        L1 = 0
        L2 = 0
        L3 = 0
        mse = 100
        reconstructed = np.zeros(len(wavenumber))
        
    L1_list.append(L1)
    L2_list.append(L2)
    L3_list.append(L3)
    MSE_list.append(mse)
    absorbance_list.append(absorbance)
    reconstructed_list.append(reconstructed)
    
    #print(encodings)
    #print(mse)
    #plt.plot(absorbance,c='b')
    #plt.plot(reconstructed,c='r')
    #plt.show()

airpls: max iteration reached!


# Save results to dataframe

In [None]:
hyperspectrum =pd.DataFrame(absorbance_list)
hyperspectrum.columns = wavenumber
hyperspectrum['map_x'] = df['map_x'].values
hyperspectrum['map_y'] = df['map_y'].values

In [None]:
reconstructed =pd.DataFrame(reconstructed_list)
reconstructed.columns = wavenumber
reconstructed['map_x'] = df['map_x'].values
reconstructed['map_y'] = df['map_y'].values

In [None]:
bvae_df =pd.DataFrame()
bvae_df['map_x'] = df['map_x'].values
bvae_df['map_y'] = df['map_y'].values
bvae_df['L1'] = L1_list
bvae_df['L2'] = L2_list
bvae_df['L3'] = L3_list
bvae_df['MSE'] = MSE_list

# Save to csv files for viewing in Quasar

In [None]:

path_to_save = 'C:/Users/Zach/Documents/BVAE_processed/'
os.makedirs(path_to_save, exist_ok=True)
bvae_df.to_csv(path_to_save +'bvae_'+filename)

In [None]:
path_to_save = "C:/Users/Zach/Documents/Hyperspectra/"
os.makedirs(path_to_save, exist_ok=True)
file=os.path.join(path_to_save + 'hyperspectrum_'+filename)
hyperspectrum.to_csv(file, index=False)

In [None]:
path_to_save = "C:/Users/Zach/Documents/Reconstructions/"
os.makedirs(path_to_save, exist_ok=True)
reconstructed.to_csv(path_to_save +'reconstructed_'+filename, index=False)