# Example of loading and processing raw EEM data


In [1]:
import pyeem

pyeem version 0.1


In [2]:
# for development - remove when example notebook is complete
def reload_pyeem():
    import importlib
    importlib.reload(pyeem.data_process)
    importlib.reload(pyeem)
    return
reload_pyeem()

pyeem version 0.1


### Step 1 - generate a pandas dataframe with the required meta data

In [3]:
meta_data = pyeem.load_eem_meta_data('example_data\Description_Example.xlsx')

- The columns 'File_Name' and 'Folder' are required, they point to the data files containing the eems to be loaded.
- 'Blank' and 'Raman_Area' are required if `pyeem.blank_subtract` and `pyeem.raman_normalize` functions will be used.
- All other columns are optional, as many columns as needed to describe the data may be included.

In [4]:
meta_data

Unnamed: 0,File_Name,Folder,Blank,Raman_Area,Desc,Type
0,ZEF0119_Cig_5ugmL,20180227,20180227_BCycHex0p5sec,1146.3,Cigarette 5 µg/mL,Sample
1,ZEF0132_Diesel_5ugmL,20180227,20180227_BCycHex0p5sec,1146.3,Diesel 5 µg/mL,Sample
2,ZEF00134_Wood_5ugmL,20180227,20180227_BCycHex0p5sec,1146.3,Wood Smoke 5 µg/mL,Sample
3,20180227_BCycHex0p5sec,20180227,20180227_BCycHex0p5sec,1146.3,2018-01-27 Cyclohexane Blank,Blank


### Step 2 - Initialize a h5 file and save the meta data in it.

In [5]:
pyeem.init_h5_database("EEM_data.h5", meta_data, overwrite=True)

overwriting EEM_data.h5


In [6]:
pyeem.load_eems("EEM_data.h5", 'example_data/')

EEM data collection complete, final data shape (Sample x Ex+1 x Em+1): (4, 251, 152)
Dataset saved: raw_eems ... Shape =  (4, 251, 152)
Dataset saved: eems ... Shape =  (4, 251, 152)


In [7]:
pyeem.blank_subtract("EEM_data.h5")

Dataset saved: blanks_subtracted ... Shape =  (4, 251, 152)
Updating dataset: eems
Dataset saved: eems ... Shape =  (4, 251, 152)


In [8]:
pyeem.apply_cleanscan('EEM_Data.h5')

Removing Scatter


100%|█████████████████████████████████████████████████████████████████████████████████| 4/4 [00:05<00:00,  1.40s/it]


Dataset saved: scatter_removed ... Shape =  (4, 251, 152)
Dataset saved: excised_values ... Shape =  (4, 251, 152)
Updating dataset: eems
Dataset saved: eems ... Shape =  (4, 251, 152)


In [9]:
pyeem.apply_spectrasmooth('EEM_Data.h5')

100%|████████████████████████████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 202.69it/s]


Finished smoothing, negative values set to zero
Dataset saved: eems_smooth ... Shape =  (4, 251, 152)
Updating dataset: eems
Dataset saved: eems ... Shape =  (4, 251, 152)


In [10]:
pyeem.crop_eems('EEM_Data.h5', crop_spec = {'ex': (500, 224), 'em': (245.917, 572.284)})

EEMs cropped according to crop_spec
{'ex': (500, 224), 'em': (245.917, 572.284), 'ex_ind': (0, 138), 'em_ind': (0, 142)}
Starting shape (4, 251, 152) (Sample x Em+1 x Ex+1)
Cropped shape (4, 144, 140) (Sample x Em+1 x Ex+1)
Dataset saved: eems_cropped ... Shape =  (4, 144, 140)
Updating dataset: eems
Dataset saved: eems ... Shape =  (4, 144, 140)


In [11]:
pyeem.raman_normalize('EEM_Data.h5')

100%|█████████████████████████████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 22.06it/s]


Dataset saved: eems_ru ... Shape =  (4, 144, 140)
Updating dataset: eems
Dataset saved: eems ... Shape =  (4, 144, 140)


##  As of 5/30, the basic data pre-processing functions are working as planned. 
I still need to:
 * Create a log of which order the processing functiond run and and parameters used when applicable.
 * Fill in some missing documentation
 * Make smoothing parmeters adjustable

In [12]:
# Here is a prototype function for displaying plots of the processed data using a slider
# Could be more useful to have a drop down or an option to enter the index of a sample in a box

import h5py
import matplotlib.pyplot as plt
from ipywidgets import interact, fixed
from pandas import read_hdf
import numpy as np

def view_eems(database_name):
    file_names = np.array(read_hdf(database_name, 'meta')['File_Name'])
    def plot_eem(database_name, i):
        
        fig = plt.figure(figsize=(4,3))
        with h5py.File(database_name, 'r') as f:
            eem = f['eems'][i]
        ex = eem[0,1:]
        em = eem[1:,0]
        fl = eem[1:,1:]
        plt.contourf(ex, em, fl)
        plt.colorbar()
        plt.title(file_names[i])
        return 
    
    print(file_names.shape[0])
    interact(plot_eem, database_name=fixed(database_name), i=(0,file_names.shape[0]-1))
    return

view_eems('EEM_Data.h5')
    

4


interactive(children=(IntSlider(value=1, description='i', max=3), Output()), _dom_classes=('widget-interact',)…

In [13]:
# I'd like to create or find some sort if summary function that can display what keys are available in the H5 file
# Here is something that lists the keys, but not additional info.
import h5py
with h5py.File("EEM_Data.h5", 'r') as f:
    for key in f.keys():
        data = f[key]
        print(key.ljust(20,' '))

blanks_subtracted   
eems                
eems_cropped        
eems_ru             
eems_smooth         
excised_values      
meta                
raw_eems            
scatter_removed     
