# Raman Analysis

## Setting up Python

### Importing necessary libraries

In [54]:
import pandas as pd
import matplotlib.pyplot as plt
import pickle
import sys
from PIL import Image
from scipy.signal import find_peaks, peak_widths
from scipy.optimize import curve_fit
import statistics
from ipywidgets import *

%matplotlib widget
sys.path.append('Scripts')
from peak_fitting import *

### Importing the data

In [55]:
def open_pickled_stats(name):
    infile = open(f"Data/pickled_for_stats/{name}_for_stats.pk", 'rb')
    data = pickle.load(infile)
    infile.close()
    return data
hmds = open_pickled_stats('hmds')
boot = open_pickled_stats('boot')
gs = open_pickled_stats('gs')
gl = open_pickled_stats('gl')
a4_multi = open_pickled_stats('a4_multi')
a4_double = open_pickled_stats('a4_double')
a4_single = open_pickled_stats('a4_single')
a6_multi = open_pickled_stats('a6_multi')
a6_double = open_pickled_stats('a6_double')
a6_single = open_pickled_stats('a6_single')
sample_dict = {'hmds':hmds, 'boot':boot, 'gs':gs, 'gl':gl, 'a4_multi':a4_multi, 
               'a4_double':a4_double, 'a4_single':a4_single, 'a6_multi':a6_multi,
               'a6_double':a6_double, 'a6_single':a6_single}

## Organisation of the data

The data for each type of sample is saved as a dictionary. The only important ones for the purpose of the analysis are 'data' and 'spectra'.

### The Raman spectra

Inputs: sample -> 'spectra' -> temperature

Outputs: list of spectra -> spectrum

To get the list of the Raman spectra for the hmds sample at 500 °C: use hmds['spectra'][500]. As this is a very long output the next cell only allows you to see how many spectra exist for different sample-temperature combinations.

In [59]:
def show_number_spectra(name, temperature):
    try:
        sample = sample_dict[name]
        n_samples = len(sample['spectra'][temperature])
        print(f'The sample {name} has {n_samples} Raman spectra taken at this temperature')
    except:
        print(f'There are no spectra available for {name} at {temperature}')
interact(show_number_spectra, name=list(sample_dict.keys()), temperature=[400, 500, 600, 700])

interactive(children=(Dropdown(description='name', options=('hmds', 'boot', 'gs', 'gl', 'a4_multi', 'a4_double…

<function __main__.show_number_spectra(name, temperature)>

Now from these spectra you can select which one you want to plot by adding a number: hmds['spectra'][500][1].
The spectra are saved as four columns pandas dataframe. This is because the Raman spectrometer wasn't set up to take a full Raman spectrum but instead two spectra centred around the G and 2D peaks.
Wavenumber1 and Intensity1 contain the D and G peaks, and Wavenumber2 and Intensity2 the 2D peak.

In [57]:
hmds['spectra'][500][1]

Unnamed: 0,Wavenumber1,Intensity1,Wavenumber2,Intensity2
0,2035.689453,2346.869873,3140.073242,2562.264160
1,2034.679688,2416.259766,3139.244141,2468.816895
2,2033.671875,2448.045898,3138.414063,2465.372314
3,2032.662109,2360.907959,3137.584961,2495.075928
4,2031.652344,2397.645996,3136.754883,2455.473633
...,...,...,...,...
1016,908.365234,3684.288574,2214.541016,1887.888794
1017,907.140625,3724.251709,2213.537109,1847.620728
1018,905.916016,3631.581543,2212.533203,1837.244385
1019,904.689453,3687.854248,2211.529297,1829.361206


To access the data in each column you have to use yet another pair of brackets or a dot.

In [58]:
x = hmds['spectra'][500][1]['Wavenumber1']
y = hmds['spectra'][500][1].Intensity1
plt.plot(x,y)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

[<matplotlib.lines.Line2D at 0x7fa187d35588>]

## Functions for analysis

### Getting an average spectrum
Given a list of spectra, for instance hmds['spectra'][400], this returns an average

In [17]:
def get_average_spectrum(spectra):
    df = spectra[0]
    for spectrum in spectra:
        df = df.add(spectrum)
    df = df.divide(len(spectra)+1)
    return df

In [18]:
get_average_spectrum(hmds['spectra'][400])

Unnamed: 0,Wavenumber1,Intensity1,Wavenumber2,Intensity2
0,2035.658203,638.550323,3140.048828,396.562980
1,2034.650391,620.927538,3139.219727,396.228061
2,2033.640625,636.786130,3138.390625,398.503084
3,2032.630859,629.242054,3137.560547,390.732739
4,2031.623047,637.112376,3136.730469,395.685949
...,...,...,...,...
1016,908.330078,927.361098,2214.511719,440.560699
1017,907.103516,920.117994,2213.507813,426.074109
1018,905.878906,936.907077,2212.503906,418.232564
1019,904.654297,916.519708,2211.500000,443.856581


### Statistical analysis of peak data

In [19]:
def get_data_stats(data):
    spec_peaks = ['d_peak', 'g_peak', 'd2_peak']
    all_peaks = {}
    spec_peaks_mean = []
    spec_peaks_dev = []
    spec_peaks_min = []
    spec_peaks_max = []
    spec_ratios = {}
    params = ["pos", "width", "intensity"]
    for peak in spec_peaks:
        peaks = [getattr(i, peak) for i in data]
        all_peaks[peak] = {j: [peak[j] for peak in peaks] for j in params}
        spec_peaks_mean.append({j: statistics.mean(all_peaks[peak][j]) for j in params})
        spec_peaks_dev.append({j: statistics.pstdev(all_peaks[peak][j]) for j in params})
        spec_peaks_min.append({j: min(all_peaks[peak][j]) for j in params})
        spec_peaks_max.append({j: max(all_peaks[peak][j]) for j in params})
    spec_mean = spec_data(*spec_peaks_mean)
    spec_dev = spec_data(*spec_peaks_dev)
    spec_min = spec_data(*spec_peaks_min)
    spec_max = spec_data(*spec_peaks_max)
    id_ig_s = [a/b for (a,b) in zip(all_peaks['d_peak']['intensity'], all_peaks['g_peak']['intensity'])]
    i2d_ig_s = [a/b for (a,b) in zip(all_peaks['d2_peak']['intensity'], all_peaks['g_peak']['intensity'])]
    spec_ratios['id_ig'] = {'mean': statistics.mean(id_ig_s), 'dev': statistics.pstdev(id_ig_s), 
                             'min': min(id_ig_s), 'max': max(id_ig_s)}
    spec_ratios['i2d_ig'] = {'mean': statistics.mean(i2d_ig_s), 'dev': statistics.pstdev(i2d_ig_s), 
                             'min': min(i2d_ig_s), 'max': max(i2d_ig_s)}
    return (spec_mean, spec_dev, spec_min, spec_max, spec_ratios)