## Author: Vinicio Soto, CICIMA
### This script calculates absortptance, reflectance and transmittance if you provide both reflectance and transflectance data, works with .txt files 
### Filenames should be in the format COLLECTION0123-1.txt. Files should have L1050 metadata header and contents should be jump corrected and averaged.

In [1]:
#Dependencies

import pandas as pd
import numpy as np
import matplotlib
import os
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
from datetime import datetime


import itertools

In [2]:
#constants
markersize = 2

### Data paths

In [10]:
#workplace = "WFH"
workplace = "CICIMA"
#workplace = "colaboratory"

#patterns and samples

if workplace == "WFH": 
    #transflectance averages
    transflectance_data_path = r"E:\CICIMA\2023_Q4_TRANSMITTANCE_AND_TRANSFLECTANCE\2023_Q4_TRANSMITTANCE_AND_TRANSFLECTANCE\2023NOV_TRANSFLECTANCE\ASC\new_names\average"
    transmittance_data_path = r"E:\CICIMA\2023_Q4_TRANSMITTANCE_AND_TRANSFLECTANCE\2023_Q4_TRANSMITTANCE_AND_TRANSFLECTANCE\2023NOV_TRANSMITTANCE\ASC\new_name\average"
    #reports
    root_path = r"E:\CICIMA\Estudio Optico Escarabajos"
    #code name samples path
    code_name_path = r"E:\CICIMA\Estudio Optico Escarabajos\CODE NAME SAMPLES 2024.txt"
    
if workplace == "CICIMA":
    #transflectance averages
    transflectance_data_path = r"C:\Users\EstebanSoto\Downloads\VINICIO\VINICIO\2023-11-TRANSFLECTANCE_AND_TRANSMITTANCE\new names\transflectance\Modificado\ASC\average"
    transmittance_data_path = r"C:\Users\EstebanSoto\Downloads\VINICIO\VINICIO\2023-11-TRANSFLECTANCE_AND_TRANSMITTANCE\new names\transmittance\Modificado\ASC\average"
    #reports
    root_path = r"C:\Users\EstebanSoto\Documents\Estudio Optico Escarabajos"
    #code name samples path
    code_name_path = r"C:\Users\EstebanSoto\Documents\Estudio Optico Escarabajos\CODE NAME SAMPLES 2024.txt"
    


#report path
current_date = datetime.now().date()

save_location = os.path.join(root_path, "data_analysis", f"TRA_{current_date}" , r"optical_info")
report_location = os.path.join(save_location, "report")


#absorptance and reflectance save path
save_path = os.path.join(save_location , r"TRA_data")




In [11]:
#Create folders 
folders_to_create = [save_location, report_location, save_path]
for folder in folders_to_create:
    try:
        os.makedirs(folder)
    except Exception as e: 
        print(e)

[WinError 183] Cannot create a file when that file already exists: 'C:\\Users\\EstebanSoto\\Documents\\Estudio Optico Escarabajos\\data_analysis\\TRA_2024-04-11\\optical_info'
[WinError 183] Cannot create a file when that file already exists: 'C:\\Users\\EstebanSoto\\Documents\\Estudio Optico Escarabajos\\data_analysis\\TRA_2024-04-11\\optical_info\\report'
[WinError 183] Cannot create a file when that file already exists: 'C:\\Users\\EstebanSoto\\Documents\\Estudio Optico Escarabajos\\data_analysis\\TRA_2024-04-11\\optical_info\\TRA_data'


#### Functions

In [12]:
def get_metadata_and_dataframe(file_location):
     #definitions
    #Logic to read ASCII data
    import os
    import pandas as pd
    import re
    
    def get_sample_code_from_filename(row_str, file_location):
        print("string")
        print(file_location)
        filename = os.path.basename(file_location)
        re1 = r"([a-zA-Z\d]+)(?:-\d)*(?:.Sample)*.(?:txt)*(?:ASC)*"
        #Names are in the form CODE-MEASUREMENTNUMBER.TXT
        p = re.compile(re1)
        m = p.match(filename)
        print(f"match filename: {m}")
        if m:
            print(f"group 1: {m.group(1)}")
            return(m.group(1))
        return get_sample_code(file_str)

    def get_sample_code(row_str):
        #Tries to get the sample code from the file, if it does not match
        #it tries to get it from the filename. 
        print("string")
        print(row_str)
        re1 = r"([a-zA-Z\d]+)(?:-\d)*(?:.Sample)*.(?:txt)*(?:ASC)*"
        #Names are in the form CODE-MEASUREMENTNUMBER.TXT
        p = re.compile(re1)
        m = p.match(row_str)
        print(f"match: {m}")
        if m:
            return(m.group(1))
        else: 
            ""

    def responses(str):
        re1 = "\d+/(\d+,\d+) \d+,\d+/(\d+,\d+)"
        p = re.compile(re1)
        m= p.match(str)
        if m:
            return m.group(1),m.group(2)    
        else:
            return "",""
    def attenuator_settings(str):
        re1 = "S:(\d+,\d+) R:(\d+,\d+)"
        p = re.compile(re1)
        m= p.match(str)
        if m:
            return m.group(1),m.group(2)    
        else:
            return "",""
    def slit_pmt_aperture(str):
        re1 = "\d+/servo \d+,\d+/(\d+,\d+)"
        p = re.compile(re1)
        m= p.match(str)
        if m:
            return m.group(1)    
        else:
            return ""
    #Initializa metadata dict
    metadata = {}
    
    #Read header
    lines = []
    with open(file_location) as myfile:
        lines = myfile.readlines()[0:90] 
    metadata["header"] = "".join(lines)
    
    
    #read_metadata
    f = open(file_location)
    
    df = pd.DataFrame()
    with f as data_file:
        for index, row in enumerate(data_file): #0-89

            row_str = row.strip()
            if index +1 == 3: #Filename and extension
                metadata["filename"]= row_str
                metadata["code"] = get_sample_code_from_filename(row_str, file_location)
            if index + 1 == 4: #date DD/MM/YYYY
                metadata["date"]= row_str
            if index + 1 == 5:#Time HH:MM:SS.SS
                metadata["time"]= row_str
            if index + 1 == 8:#user
                metadata["user"]= row_str
            if index + 1 == 9:#description
                metadata["description"]= row_str
            if index + 1 == 10:#minimum wavelength
                metadata["minimum_wavelength"]= row_str
            if index + 1 == 12:#equipment name
                metadata["equipment"]= row_str
            if index + 1 == 13:#equipment series
                metadata["series"]= row_str
            if index + 1 == 14:#data visualizer version, equipment version, date and time
                metadata["software"]= row_str
            if index + 1 == 21:#Operating mode
                metadata["operating_mode"]= row_str
            if index + 1 == 22: #Number of cycles
                metadata["cycles"]= row_str
            if index + 1 == 32: #range/servo
                metadata["slit_pmt"]= slit_pmt_aperture(row_str)
            if index + 1 == 33:
                metadata["response_ingaas"], metadata["response_pmt"]= responses(row_str)
            if index + 1 == 35: #pmt gain, if 0 is automatic
                metadata["pmt_gain"]= row_str
            if index + 1 == 36: #InGaAs detector gain
                metadata["ingaas_gain"]= row_str
            if index + 1 == 42:#monochromator wavelength nm
                metadata["monochromator_change"]= row_str
            if index + 1 == 43:#lamp change wavelength
                metadata["lamp_change"]= row_str
            if index + 1 == 44:#pmt wavelength
                metadata["pmt_change"]= row_str
            if index + 1 == 45:#beam selector
                metadata["beam_selector"]= row_str
            if index + 1 == 46:
                metadata["cbm"]= row_str
            if index + 1 == 47: #cbd status, on/off
                metadata["cbd_status"]= row_str
            if index + 1 == 48: #attenuator percentage
                metadata["attenuator_sample"], metadata["attenuator_reference"]= attenuator_settings(row_str)
            if index + 1 == 49:
                metadata["polarizer"]= row_str
            if index + 1 == 80:
                metadata["units"]= row_str
            if index + 1 == 81:
                metadata["measuring_mode"]= row_str
            if index + 1 == 84:
                metadata["maximum_wavelength"]= row_str
            if index + 1 == 85:
                metadata["step"]= row_str
            if index + 1 == 86:
                metadata["number_of_datapoints"]= row_str
            if index + 1 == 88:
                metadata["maximum_measurement"]= row_str
            if index + 1 == 89:
                metadata["minimum_measurement"]= row_str
            if index +1 == 90:
                break
        df = pd.read_csv(f, sep="\t", decimal =".", names=["wavelength", metadata["measuring_mode"]]).dropna()
        df["wavelength"],df[metadata["measuring_mode"]] = df["wavelength"].astype(float), df[metadata["measuring_mode"]].astype(float)
        return metadata, df

In [13]:
#Create folder if not exists
def create_folder_if_not_exists(folder_path):
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
        print(f"Folder '{folder_path}' created successfully.")
    else:
        print(f"Folder '{folder_path}' already exists.")
        
def filter_substring_elements(path_strings, substring):
    filtered_paths = [path for path in path_strings if substring in path]
    return filtered_paths

###### Markdown and LaTeX

In [14]:
### List files in folder
import os

def list_files_in_folder(folder_path):
    files_only_in_folder = []
    
    # Get the list of all files in the folder and its subfolders
    all_files = []
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            all_files.append(os.path.join(root, file))
    
    # Get the list of files that are only in the top-level folder
    for file in all_files:
        if os.path.isfile(file) and os.path.dirname(file) == folder_path:
            files_only_in_folder.append(file)
    
    return files_only_in_folder

transflectance_file_list_general = list_files_in_folder(transflectance_data_path)
transmittance_file_list_general = list_files_in_folder(transmittance_data_path)
#std_dev_file_list
#transmittance_file_list_general

### Dataframe List

In [15]:
transmittance_dataframes = []
absorptance_dataframes = []
reflectance_dataframes = []
transflectance_dataframes = []

### Absorptance

In [16]:
#read codes
codes_df = pd.read_csv(code_name_path, sep="	",header=0,  
                     names=["code", "description"]) 
codes_df

Unnamed: 0,code,description
CICIMAUCR0105,CHRYSINA KALININI LA AMISTAD 2022,C. kalinini La Amistad AM V.2022
CICIMAUCR0104,C. RESPLENDENS MV 23-19,MV.V.23 02 02-VI-2023 125. C. resplendens.
CICIMAUCR0158,C. CUPREOMARGINATA MV 23-15,C. cupreomarginata MV 23-15
INBUCR0431,C. AURIGANS INBIO DESCABEZADO,
INBUCR0216,C. CHRYSARGYREA INBIOCRI002426713,
CICIMAUCR0044,C. OPTIMA #81 7 SIGNO PREGUNTA,
CICIMAUCR0163,C. CHRYSARGYREA #73 MV23,
CICIMAUCR0173,C. CHRYSARGYREA #84 MV23,
INBUCR0112,STRIGIDIA BELTI INBIO0004211063,
INBUCR0114,STRIGIDIA GLABRA INBIOCRI0003420620,


### Reflectance

In [18]:
for code in codes_df["code"]:
    transflectance_file_list_filtered = filter_substring_elements(transflectance_file_list_general, code)
    transmittance_file_list_filtered = filter_substring_elements(transmittance_file_list_general, code)
    #print("len")
    #print(len(transflectance_file_list_filtered))
    #print(len(transmittance_file_list_filtered))
    for file1 in transflectance_file_list_filtered:
        for file2 in transmittance_file_list_filtered:
                
                print(file1)
                print(file2)
                print("*****")
            #declare transflectance dataframe
                transflectance_metadata , transflectance_df = get_metadata_and_dataframe(file1)
                print(f"metadata tx: {transflectance_metadata}")
                transflectance_measuring_mode = transflectance_metadata["measuring_mode"]
                #transflectance_df = pd.read_csv(file1, sep="	",header=None, names=["wavelength", "transflectance"]).dropna()
                #transflectance_df["wavelength"],transflectance_df["transflectance"] = transflectance_df["wavelength"].astype(float),transflectance_df["transflectance"].astype(float)
                transflectance_df = transflectance_df[transflectance_df['wavelength'] <= 2000]
                #ALL points over 100 will be set to 100
                #TODO: Create a normalization method.
                transflectance_df.loc[transflectance_df[transflectance_measuring_mode] > 100, transflectance_measuring_mode] = 100
                transflectance_dataframes.append(transflectance_df)
                
                #add code, genus and species
                transflectance_df["code"] = code
                transflectance_df["species"] = transflectance_metadata["species"]
                transflectance_df["genus"] = transflectance_metadata["genus"]
                    
                add_info(transflectance_df, code, transflectance_metadata)
                #transflectance_df["code"] = code
            #declare transmittance dataframe
                transmittance_metadata ,transmittance_df= get_metadata_and_dataframe(file2)
                transmittance_measuring_mode = transmittance_metadata["measuring_mode"]
                #transmittance_df  = pd.read_csv(file2, sep="	",header=None, names=["wavelength", "transmittance"]).dropna()
                #transmittance_df["wavelength"],transmittance_df["transmittance"] = transmittance_df["wavelength"].astype(float),transmittance_df["transmittance"].astype(float)
                transmittance_df = transmittance_df[transmittance_df['wavelength'] <= 2000]
                #ALL points over 100 will be set to 100
                #TODO: Create a normalization method. 
                transmittance_df.loc[transmittance_df[transmittance_measuring_mode] > 100, transmittance_measuring_mode] = 100
                transmittance_df["code"] = code
                transmittance_dataframes.append(transmittance_df)

                #add code, genus and species
                transmittance_df["code"] = code
                # transmittance_df["species"] = transmittance_df_metadata["species"]
                # transmittance_df["genus"] = transmittance_df_metadata["genus"]
                #debug
                #print(transflectance_df)
                #print(transmittance_df)
            
                
                
                #print("merged")
                #print(merged_df)
                
                
                
                #Calculate absorptance
                absorptance_df = pd.DataFrame([])
                absorptance_df["wavelength"] = transflectance_df["wavelength"]
                absorptance_df["%A"] = 100.0 - transflectance_df[transflectance_measuring_mode]
                absorptance_df["code"]= code
                absorptance_dataframes.append(absorptance_df)
                #add code, genus and species
                # absorptance_df["code"] = code
                absorptance_df["species"] = transflectance_metadata["species"]
                absorptance_df["genus"] = transflectance_metadata["genus"]
                #print("absorptance")
                #print(absorptance_df)
                
                # Create a new DataFrame with 'Wavelength' and 'Subtracted_Value' columns
                # Subtract 'transmittance' from 'Transflectance' and create a new column 'Subtracted_Value'
                
                 # Merge the two DataFrames on 'Wavelength' column
                merged_df = pd.merge(transflectance_df, transmittance_df, on='wavelength')
                merged_df['%R'] = merged_df[transflectance_measuring_mode] - merged_df[transmittance_measuring_mode]
                print(merged_df)
                reflectance_df = merged_df[['wavelength', '%R']]
                reflectance_df.file = file1
                reflectance_df["code"] = code
                #print("reflectance.file")
                #print(reflectance_df.file)
                reflectance_dataframes.append(reflectance_df)
                #add code, genus and species
                reflectance_df["code"] = code
                # reflectance_df["species"] = transflectance_metadata["species"]
                # reflectance_df["genus"] = transflectance_metadata["genus"]
                
                reflectance_df.plot(x='wavelength', y='%R', kind='scatter', s=markersize, title = code + " mean and moving avg", figsize = (8, 6))
                #plt.show()
                #print(reflectance_df)

                #define function that deletes whitespace in saved file
                def delete_blank_row(archive_name):
                    with open(archive_name, "r") as f:
                        data = f.read()
                    with open(archive_name, "w") as f:
                        f.write(data.replace("#DATA\n","#DATA"))

                
                #saves files
                def save_reflectance():
                    corrected_path = save_path +"\\" + "reflectance\\"

                    if not os.path.exists(corrected_path):
                        os.mkdir(corrected_path)
    
                    new_filename = code +".txt"
                    reflectance_numpy = reflectance_df.drop(columns=["code"]).to_numpy()
                    new_archive_name = corrected_path + new_filename
                    #np.savetxt(new_archive_name, reflectance_numpy,fmt='%s', delimiter="\t")
                    print(f"type %r: {type(reflectance_df)}")
                    print(f"df \n {reflectance_df}")
                    print(f"reflectance numpy \n {reflectance_numpy}")
                    np.savetxt(new_archive_name, reflectance_numpy,fmt = "%.6f", delimiter="\t", header=transmittance_metadata["header"].replace("%T","%R"), comments='')
                    delete_blank_row(new_archive_name)
                    
                #reflectance 
                
                save_reflectance()
                
                #transmittance 
                def save_transmittance():
                    corrected_path = save_path +"\\" + "transmittance\\"
    
                    if not os.path.exists(corrected_path):
                        os.mkdir(corrected_path)
    
                    new_filename = code +".txt"
                    transmittance_numpy = transmittance_df.drop(columns=["code"]).to_numpy()
                    new_archive_name = corrected_path + new_filename
                    # np.savetxt(new_archive_name, transmittance_numpy,fmt='%s', delimiter="\t")
                    #todo
                    np.savetxt(new_archive_name, transmittance_numpy,fmt = "%.6f", delimiter="\t", header=transmittance_metadata["header"], comments='')
                    delete_blank_row(new_archive_name)
                                    
                save_transmittance()
                #absorptance 

                def save_absorptance():
                    corrected_path = save_path +"\\" + "absorptance\\"
    
                    if not os.path.exists(corrected_path):
                        os.mkdir(corrected_path)
    
                    new_filename = code +".txt"
                    absorptance_numpy = absorptance_df.drop(columns=["code"]).to_numpy()
                    new_archive_name = corrected_path + new_filename
                    #np.savetxt(new_archive_name, absorptance_numpy,fmt='%s', delimiter="\t")
                    np.savetxt(new_archive_name, absorptance_numpy,fmt = "%.6f", delimiter="\t", header=transmittance_metadata["header"].replace("%T","%A"), comments='')
                    delete_blank_row(new_archive_name)
                    
                save_absorptance()
                
                #transflectance 
                def save_transflectance():
                    corrected_path = save_path +"\\" + "transflectance\\"
    
                    if not os.path.exists(corrected_path):
                        os.mkdir(corrected_path)
    
                    new_filename = code +".txt"
                    transflectance_numpy = transflectance_df.drop(columns=["code"]).to_numpy()
                    new_archive_name = corrected_path + new_filename
                    #np.savetxt(new_archive_name, transflectance_numpy,fmt='%s', delimiter="\t")
                    np.savetxt(new_archive_name, transflectance_numpy,fmt = "%.6f", delimiter="\t", header=transflectance_metadata["header"], comments='')
                    delete_blank_row(new_archive_name)
                save_transflectance()
                
                
                #print(transflectance_df)
                #print(transmittance_df)
                #print(reflectance_df)
                #print(absorptance_df)

            

In [19]:
absorptance_dataframes

[]

In [21]:
create_folder_if_not_exists(report_location)
counterA = 1
counterB = 0

optical_info_report_path = os.path.join(report_location + "\\" + 'optical info.pdf')

with matplotlib.backends.backend_pdf.PdfPages(optical_info_report_path) as pdf:
    counterB = 0
    
    for dataframe in absorptance_dataframes:
        #print(dataframe)
        counterB = counterB +1
        avg_plot = dataframe.plot(x='wavelength', y='%A', kind='scatter', s=markersize, title = f"Figure {counterA}.{counterB} " + dataframe["code"].iloc[0] + " ", figsize = (8, 6), grid= True)
        avg_plot.set_xlabel("Wavelength [nm] ")
        avg_plot.set_ylabel("absorptance [%]")
        #save figures
        pdf.savefig()
        #close plot
        plt.close()
    
    counterA = 2
    counterB = 0
    
    for dataframe in transmittance_dataframes:
        counterB = counterB +1
        #print(dataframe["code"])
        #print(dataframe.file)
        avg_plot = dataframe.plot(x='wavelength', y='%T', kind='scatter', s=markersize, title = f"Figure {counterA}.{counterB} " + (dataframe["code"].iloc[0]) + " ", figsize = (8, 6), grid= True)
        avg_plot.set_xlabel("Wavelength [nm] ")
        avg_plot.set_ylabel("transmittance [%]")
        #save figures
        pdf.savefig()
        #close plot
        plt.close()
    
    counterA = 3
    counterB = 0
    
    for dataframe in reflectance_dataframes:
        counterB = counterB +1
        #print(dataframe.file)
        avg_plot = dataframe.plot(x='wavelength', y='%R', kind='scatter', s=markersize, title = f"Figure {counterA}.{counterB} " + (dataframe["code"].iloc[0]) + " ", figsize = (8, 6), grid= True)
        avg_plot.set_xlabel("Wavelength [nm] ")
        avg_plot.set_ylabel("reflectance [%]")
        #save figures
        pdf.savefig()
        #close plot
        plt.close()
        
    counterA = 4
    counterB = 0
    
    for dataframe in transflectance_dataframes:
        counterB = counterB +1
        #print(dataframe.file)
        avg_plot = dataframe.plot(x='wavelength', y='%R', kind='scatter', s=markersize, title = f"Figure {counterA}.{counterB} " + (dataframe["code"].iloc[0]) + " ", figsize = (8, 6), grid= True)
        avg_plot.set_xlabel("Wavelength [nm] ")
        avg_plot.set_ylabel("transflectance [%]")
        #save figures
        pdf.savefig()
        #close plot
        plt.close()



Folder 'C:\Users\EstebanSoto\Documents\Estudio Optico Escarabajos\data_analysis\TRA_2024-04-11\optical_info\report' already exists.


  with matplotlib.backends.backend_pdf.PdfPages(optical_info_report_path) as pdf:
