In [29]:
# Imports 
import os
import re
import pickle

In [26]:
# Get absorption and emission file paths
def get_absorption_emission_files(directory):
    abs_files, ems_files = [], []
    for fp in os.listdir(directory):
        match = re.match(r'^(.*?)\(.*\)(.*)$', fp)
        if match:
            compound_solvent, file_type = match.groups()
            if file_type == '.abs.txt':
                abs_files.append((compound_solvent, fp))
            elif file_type == '.ems.txt':
                ems_files.append((compound_solvent, fp))
    return abs_files, ems_files

# Read in the text file and return wavelength and intensity
def read_txt_file(txt_file_path):
    wavelengths, intensities = [], []
    with open(txt_file_path, 'r') as file:
        lines = file.readlines()
        for i, line in enumerate(lines):
            if i > 0:
                line_split = line.split()
                if len(line_split) > 0:
                    wavelengths.append(float(line_split[0]))
                    intensities.append(float(line_split[1]))
    return wavelengths, intensities

def match_absorption_emission(directory, abs_files, ems_files):
    ems_dict = dict(ems_files)
    combined = {
        key: {'absorption': val1, 'emission': ems_dict.get(key)}
        for key, val1 in abs_files if key in ems_dict}
    wavelengths_and_intensities = dict()
    for compound_key in combined.keys():
        abs_wavelength, abs_intensity = read_txt_file(os.path.join(directory, combined[compound_key]['absorption']))
        ems_wavelength, ems_intensity = read_txt_file(os.path.join(directory, combined[compound_key]['emission']))
        compound_dict = dict()
        compound_dict['absorption'] = {'wavelength':abs_wavelength, 'intensity':abs_intensity}
        compound_dict['emission'] = {'wavelength':ems_wavelength, 'intensity':ems_intensity}
        wavelengths_and_intensities[compound_key] = compound_dict
    return wavelengths_and_intensities


In [24]:
database_path = './Natural Chlorophylls/'
abs_files, ems_files = get_absorption_emission_files(database_path)

In [25]:
print(ems_files)

[('CHL197_BChl d [E,E]+[P,M], Et2O ', 'CHL197_BChl d [E,E]+[P,M], Et2O (Tamiaki, 2011b).ems.txt'), ('CHL043_Phe a, Pyr ', 'CHL043_Phe a, Pyr (Hindman, 1977).ems.txt'), ('CHL199_BChl e, Pyr ', 'CHL199_BChl e, Pyr (Niedzwiedzki, 2010).ems.txt'), ('CHL115_PyroPhebide a ME, DCM ', 'CHL115_PyroPhebide a ME, DCM (Dixon, 2005).ems.txt'), ('CHL137_Zn Phe a, C6H6 ', 'CHL137_Zn Phe a, C6H6 (Kobayashi, 2006).ems.txt'), ('CHL156_3-Et Chl e6, DMSO ', 'CHL156_3-Et Chl e6, DMSO (Uliana, 2014).ems.txt'), ('CHL026_Chl d, Pyr ', 'CHL026_Chl d, Pyr (Niedzwiedzki, 2010).ems.txt'), ('CHL273_BPhe a, Et2O ', 'CHL273_BPhe a, Et2O (Goedheer, 1966).ems.txt'), ('CHL222_Chl c2, Pyr ', 'CHL222_Chl c2, Pyr (Niedzwiedzki, 2010).ems.txt'), ('CHL029_Ch f, C6H6 ', 'CHL029_Ch f, C6H6 (Kobayashi, 2016).ems.txt'), ('CHL201_BChl e [E,E], THF ', 'CHL201_BChl e [E,E], THF (Kinoshita, 2019).ems.txt'), ('CHL271_BChl g, Pyr ', 'CHL271_BChl g, Pyr (Niedzwiedzki, 2010).ems.txt'), ('CHL230_Chl C2 phytyl, THF ', 'CHL230_Chl C2 phyt

In [27]:
waves_n_intensities = match_absorption_emission(database_path, abs_files, ems_files)

In [28]:
print(len(waves_n_intensities))

66


In [30]:
# Save this as a pickle file
pickle.dump(waves_n_intensities, open('./natural_chlorophyll_spectra.pkl', 'wb'))