# Data Analysis
This program is used in conjunction with the "Injection Program" notebook to analyze spectroscopy data, using pandas and matplotlib.
The first part of this notebook focuses on pre-processing techniques. The second half (not yet available) is dedicated to the machine learning implementation to classify our datasets.

### PART 1: Pre-Processing

Select which dataset to analyze

In [85]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy.signal import savgol_filter as sgf
import matplotlib.cm as cm
import matplotlib.animation as animation

%matplotlib qt

exp_name = input("Experiment name: ").strip()
df = pd.read_csv("../Scan CSV Files/{}.csv".format(exp_name))
df.drop("Unnamed: 0", axis=1, inplace=True)
df.set_index("Wavelengths", inplace=True)

chemical = input("What chemical: ")

Experiment name: 11
What chemical: Water


##### Spectra

In [84]:
%matplotlib qt

fig = plt.figure(figsize=(14,7))
reg = fig.add_subplot(111, title="{}: Experiment Spectra".format(chemical), xlabel="Wavelength (nm)", ylabel="Intensity (A.u.)")

sliced = [df.index[500], df.index[1200]]
colors = cm.rainbow(np.linspace(0, 1, len(df.columns)))

intensities = [sgf(list(df.loc[sliced[0]:sliced[1], df.columns[i]]), window_length=101, polyorder=2) for i in range(len(df.columns))]
yticks = np.linspace(int(np.amin(np.array(intensities))*1.1), int(np.amax(np.array(intensities))*1.1), 10)


def animate(i): 
    reg.clear()
    reg.plot(intensities[i], color=colors[i])
    reg.set_yticks(yticks)
    reg.set_yticklabels([int(i) for i in yticks])
    reg.legend([df.columns[i]])

ani = animation.FuncAnimation(fig, animate, interval=1)
plt.plot(df.intensities[0])
plt.plot(df.intensities[-1])
plt.show()

AttributeError: 'DataFrame' object has no attribute 'intensities'

##### Fourier Transform

In [89]:
f = 1
fig = plt.figure(figsize=(14,7))
# reg = fig.add_subplot(121, title="{} Experiment Spectra".format(chemical), xlabel="Wavelength (nm)", ylabel="Intensity (A.u.)")
ft = fig.add_subplot(111, title="{} Experiment Fourier transform".format(chemical), xlabel="time (s)")

sliced = [df.index[500], df.index[1200]]
# reg.plot( df.loc[sliced[0]:sliced[1], :])
# reg.plot( df.loc[sliced[0]:sliced[1], '750.0':'810.0'])

# taking the transpose means you get the fourier transform with time axis
ft_data = np.fft.fft( (df.loc[sliced[0]:sliced[1], :]).transpose() ) 
# ft_data = np.fft.fft( (df.loc[sliced[0]:sliced[1], '750.0':'810.0']).transpose() ) 

ft.plot(np.real(ft_data[:,f]))
ft.plot(np.imag(ft_data[:,f]))

# =========== Figure Layout ============ #

# axes titles
ft.set_xticks(np.linspace(0, len(df.columns), 11)) # 11 ticks
ft.set_xticklabels([df.columns[i] for i in range(len(df.columns)) if i%(int(len(df.columns)/10)) == 0]) 
# ft.set_xticks(np.linspace(0, 600, 11)) # 11 ticks
# ft.set_xticklabels([df.columns[i] for i in range(7500,8100) if i%(int(600/10)) == 0]) 

if input("Save figure?: ").lower() != "y":
    pass
else: 
    plt.savefig("../Analysis Images/{}_fourier.png".format(exp_name))

Save figure?: y


##### Phase Calculation

In [90]:
R = np.real(ft_data[:, 1])
I = np.imag(ft_data[:, 1])

# Calculating Phase

phase = I / (R ** 2 + I ** 2) ** 0.5
phase = sgf(phase, window_length=31, polyorder=3)

# Normalization

phase = phase/np.sqrt(np.sum(phase**2))

# Plotting

fig = plt.figure(figsize=(14,7))

# ft = fig.add_subplot(121, title="{} Experiment Fourier Transform".format(chemical), xlabel="time (s)", ylabel="Intensity (A.u.)")
ph = fig.add_subplot(111, title="{} Experiment Phase".format(chemical), xlabel="time (s)")

# ft.plot(np.real(ft_data[:,f]))
# ft.plot(np.imag(ft_data[:,f]))

ph.plot(phase)

# ft.set_xticks(np.linspace(0, len(df.columns), 11))
# ft.set_xticklabels([df.columns[i] for i in range(len(df.columns)) if i%int(len(df.columns)/10) == 0])

ph.set_xticks(np.linspace(0, len(df.columns), 11))
ph.set_xticklabels([df.columns[i] for i in range(len(df.columns)) if i%int(len(df.columns)/10) == 0])

# ft.set_xticks(np.linspace(0, 600, 11)) # 11 ticks
# ft.set_xticklabels([df.columns[i] for i in range(7500,8100) if i%(int(600/10)) == 0])


# ph.set_xticks(np.linspace(0, 600, 11)) # 11 ticks
# ph.set_xticklabels([df.columns[i] for i in range(7500,8100) if i%(int(600/10)) == 0])

if input("Save figure?: ").lower() != "y":
    pass
else: 
    plt.savefig("../Analysis Images/{}_phase.png".format(exp_name))

Save figure?: y


##### Phase Derivative

In [91]:
phase_deriv = np.diff(phase)
phase_deriv = sgf(phase_deriv, window_length=31, polyorder=3)

# Normalization

phase_deriv = phase_deriv/np.sqrt(np.sum(phase_deriv**2))

# Plotting

fig = plt.figure(figsize=(14,7))

# ph = fig.add_subplot(121, title="{} Experiment Phase".format(chemical), xlabel="time (s)", ylabel="Intensity (A.u.)")
pd = fig.add_subplot(111, title="{} Experiment Phase Derivative".format(chemical), xlabel="time (s)")

# ph.plot(phase)
pd.plot(phase_deriv)

pd.set_xticks(np.linspace(0, len(df.columns), 11))
pd.set_xticklabels([df.columns[i] for i in range(len(df.columns)) if i%int(len(df.columns)/10) == 0])
# ph.set_xticks(np.linspace(0, len(df.columns), 11))
# ph.set_xticklabels([df.columns[i] for i in range(len(df.columns)) if i%int(len(df.columns)/10) == 0])

# pd.set_xticks(np.linspace(0, 600, 11)) # 11 ticks
# pd.set_xticklabels([df.columns[i] for i in range(7500,8100) if i%(int(600/10)) == 0])
# ph.set_xticks(np.linspace(0, 600, 11)) # 11 ticks
# ph.set_xticklabels([df.columns[i] for i in range(7500,8100) if i%(int(600/10)) == 0])

if input("Save figure?: ").lower() != "y":
    pass
else: 
    plt.savefig("../Analysis Images/{}_phase_deriv.png".format(exp_name))

Save figure?: y


### PART 2: Machine Learning Framework