In [None]:
#Libraries

In [1]:
import os
import pandas as pd
import numpy as np
from natsort import natsorted
import joblib
from tensorflow.keras.models import save_model, load_model
from sklearn.preprocessing import LabelEncoder

In [None]:
#Path

In [12]:
directory_path = "*" #Select a path

In [None]:
#############################################################################################

In [None]:
#Functions

In [3]:

def clean_str_ext(directory_path):
    # Get a list of all files in the directory
    file_list = os.listdir(directory_path)
    # Filter out only the .jcm files
    jcm_files = [file for file in file_list if file.endswith(".jcm")]
    # Loop through each .jcm file and remove the "##END=" string
    for file_name in jcm_files:
        file_path = os.path.join(directory_path, file_name)
        # Read the content of the file
        with open(file_path, 'r') as f:
            lines = f.readlines()
        # Remove the "##END=" string if it exists
        modified_lines = [line for line in lines if "##END=" not in line]
        # Write the modified content back to the file
        with open(file_path, 'w') as f:
            f.writelines(modified_lines)
    print("Done removing ##END= from files.")
    
    ##EXTRACTION##
    jcm_files = natsorted([file for file in os.listdir(directory_path) if file.endswith(".jcm")])
    # Initialize an empty DataFrame
    final_df = pd.DataFrame(columns=['ThirdValue'])
    # Loop through each .jcm file
    for file_name in jcm_files:
         file_path = os.path.join(directory_path, file_name)
         # Read the content of the file
         with open(file_path, 'r') as f:
             lines = f.readlines()
         # Extract the third values and create a pandas DataFrame
         third_values = ["0." + line.strip().split(',')[3].split()[0] for line in lines]
         df = pd.DataFrame(third_values, columns=['ThirdValue'])
         # Transpose the DataFrame to convert rows to columns
         df_transposed = df.transpose()
         # Append the transposed DataFrame to the final DataFrame
         final_df = pd.concat([final_df, df_transposed], ignore_index=True)
    final_df.drop('ThirdValue', axis=1, inplace=True)
    
    ##HEADERS##
    with open(directory_path+'/'+jcm_files[0], 'r') as f:
        lines = f.readlines()
    # Extract the third values and create a pandas DataFrame
    third_values = [float(line.split(',')[0]) for line in lines]
    df = pd.DataFrame({'ThirdValue': third_values})
    df_t = df.transpose()
    headers = df_t.iloc[0]
    final_df.rename(columns = headers, inplace = True)
    print('Headers extracted from '+directory_path+'/'+jcm_files[0])
    print("dataframe created.")
    return(final_df)

# Derivatives
def deriv(non_spectral_df, spectral_df):
    # Create a new DataFrame to store the derivatives
    derivatives_df = pd.DataFrame()
    columns_spectral = spectral_df.columns

    # Loop through each column and calculate the derivatives
    for col_index, col_name in enumerate(spectral_df.columns):
        col_values = spectral_df[col_name].values
        col_first_derivative = np.gradient(col_values)
        col_second_derivative = np.gradient(col_first_derivative)
        derivatives_df[str(col_name) + '_1st_derivative'] = col_first_derivative
        derivatives_df[str(col_name) + '_2nd_derivative'] = col_second_derivative

    # Concatenate the original DataFrame and the derivatives DataFrame along columns
    result_df2 = pd.concat([spectral_df, derivatives_df], axis=1)
    result_df3 = pd.concat([non_spectral_df, result_df2], axis=1)
    print('1st and 2nd derivatives calculated.')
    return(result_df3)
    


In [None]:
#Application

In [None]:
spectral_df = clean_str_ext(directory_path)
spectral_df = spectral_df.apply(pd.to_numeric)

#Load the non spectral matrix (i.e., instrumental params, etc.)
non_spectral_df = pd.read_csv('*.csv',decimal=',') #*.csv -> directory and 'matrix_name.csv'
non_spectral_df = non_spectral_df.apply(pd.to_numeric)

final_df = deriv(non_spectral_df,spectral_df)

In [None]:
#saving the matrix

In [None]:
final_df.to_csv('final_df.csv', index=False)