In [1]:
# Load necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import glob
from scipy.optimize import curve_fit
from sklearn.decomposition import PCA

In [None]:
# Set the directory where your data files are located
data_dir = '/home/jwm/data/JASCO_FP_6500_Spectrofluorometer/2024april13_50mM_DAB2_418_630_150mM_NaCl_1DCVJ_slow_hysteresis/'

# Set the number of the first and last spectrum to use in the plot -1
first_spectrum = 0
last_spectrum = 30

# Define the range of titles you want to load
start_title = 7
end_title = 30

# Define the position of the box
box_x = 10
box_y = 300

titles = []

# Function to load (x, y) data from txt file starting from the 19th line
def load_data(file_path):
    return np.loadtxt(file_path, skiprows=18)

# Get all txt files in the directory
data_files = glob.glob(data_dir + "*.txt")

# Initialize a list to store loaded data
loaded_data = []

# Initialize variables for baseline data
baseline_data = None
baseline_title = None

# Load data from each file
for file_path in data_files:
    loaded_data.append(load_data(file_path))

# Extract 6th to 4th last digits from file name and use as title
titles = [int(file_path.split('/')[-1].split('.')[0][-7:-4]) for file_path in data_files]

print(titles)

# Sort the titles and data_files in ascending order
sorted_indices = np.argsort(titles)
titles = [titles[i] for i in sorted_indices]
data_files = [data_files[i] for i in sorted_indices]
loaded_data = [loaded_data[i] for i in sorted_indices]
    
# Check if file name contains "999" and assign it as baseline data
if "999" in file_path:
    baseline_data = loaded_data[-1]
    baseline_title = title

# Sort the files by their title
data_files.sort(key=lambda x: int(x.split('/')[-1].split('.')[0][-6:-4]))

# print(titles)
# print(loaded_data)

# Convert extracted digits back to temperature values
temperatures = [10 + i * 0.5 for i in range(len(titles))]


# Sample loaded_data structure
# loaded_data = [
#    np.array([[470.0, 422.133], [470.1, 422.08], [470.2, 422.001], [650.0, 7.49391]]),
#    np.array([[470.0, 430.678], [470.1, 430.75], [470.2, 430.611], [650.0, 7.97855]])
#]

# Function to create DataFrame
def create_dataframe(loaded_data, titles):  # spectra in rows
    # Initialize DataFrame with the first spectrum
    df = pd.DataFrame(loaded_data[0], columns=['wavelength', titles[0]])
    
    # Iterate over the remaining spectra and concatenate them to the DataFrame
    for i in range(1, len(loaded_data)):
        spectrum_df = pd.DataFrame(loaded_data[i], columns=['wavelength', titles[i]])
        df = pd.merge(df, spectrum_df, on='wavelength', how='outer')
    
    return df


# Example usage
# included_titles = [int(title) for title in input("Enter the titles to include (separated by space): ").split()]
# selected_data = [loaded_data[titles.index(title)] for title in included_titles]

# Example usage
def get_selected_data(loaded_data, titles_input):
    selected_data = []
    for title_input in titles_input:
        if '-' in str(title_input):
            start, end = map(int, title_input.split('-'))
            selected_data.extend(loaded_data[start-1:end])
        else:
            selected_data.append(loaded_data[int(title_input)-1])
    return selected_data

title_inputs = input("Enter the titles to include (separated by space), and ranges separated by '-': ").split()
included_titles = [title for title_input in title_inputs for title in (list(range(int(title_input.split('-')[0]), int(title_input.split('-')[-1])+1)) if '-' in title_input else [int(title_input)])]

selected_data = get_selected_data(loaded_data, included_titles)



df = create_dataframe(selected_data, included_titles)
df = df.transpose()
# print(df)

# print(df.dtypes)


# Extract spectra and wavelengths
spectra = df.iloc[1:, 2:].values.astype(float)  # Convert to numpy array
wavelengths = df.iloc[1, 2:].values.astype(float)

# Step 3: Perform PCA
# Initialize PCA with desired number of components

n_components = 5  # You can adjust this parameter
pca = PCA(n_components=n_components)

# Fit PCA to the data
pca.fit(spectra)

# Get principal components and singular values
components = pca.components_
singular_values = pca.singular_values_

# Step 4: Visualize Singular Values
plt.figure(figsize=(10, 6))
plt.plot(np.arange(1, n_components + 1), singular_values, marker='o', linestyle='-')
plt.xlabel('Component')
plt.ylabel('Singular Value')
plt.title('Singular Values')
plt.grid(True)
plt.show()

# Step 5: Visualize Component Spectra
plt.figure(figsize=(10, 6))
for i in range(n_components):
    plt.plot(wavelengths, components[i], label=f'Component {i+1}')
plt.xlabel('Wavelength')
plt.ylabel('Intensity')
plt.title('Component Spectra')
plt.legend()
plt.xlim(470, 650)  # Adjust x-axis limits
plt.grid(True)
plt.show()


[16, 11, 29, 30, 17, 23, 22, 3, 12, 19, 6, 15, 28, 18, 5, 8, 25, 14, 7, 24, 4, 10, 2, 9, 27, 31, 26, 999, 20, 1, 21, 13]
