In [3]:
import pandas as pd
import numpy as np
import glob
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA

In [8]:


# Set the directory where your data files are located
data_dir = '/home/jwm/data/JASCO_FP_6500_Spectrofluorometer/2024april13_50mM_DAB2_418_630_150mM_NaCl_1DCVJ_slow_hysteresis/'

# Function to load (x, y) data from txt file starting from the 19th line
def load_data(file_path):
    return np.loadtxt(file_path, skiprows=18)

# Get all txt files in the directory
data_files = glob.glob(data_dir + "*.txt")

# Extract 6th to 4th last digits from file name and use as title
titles = [int(file_path.split('/')[-1].split('.')[0][-7:-4]) for file_path in data_files]

# Sort the titles and data_files in ascending order
sorted_indices = np.argsort(titles)
titles = [titles[i] for i in sorted_indices]
data_files = [data_files[i] for i in sorted_indices]

# Initialize an empty list to store DataFrames
dataframes = []

# Load data from each file and create DataFrames
for file_path in data_files:
    title = int(file_path.split('/')[-1].split('.')[0][-7:-4])  # Extract title from file name
    data = load_data(file_path)
    df = pd.DataFrame(data, columns=['X', 'Y'])  # Assuming the columns are X and Y
    dataframes.append(df)

# Create a MultiIndex from titles and concatenate DataFrames
df_combined = pd.concat(dataframes, keys=titles)

# Reset index to remove MultiIndex and rename level 0 to 'Title'
df_combined.reset_index(level=0, inplace=True)
df_combined.rename(columns={'level_0': 'Title'}, inplace=True)

# Print the resulting DataFrame
print(df_combined)

# Perform PCA
pca = PCA(n_components=5)  # Number of components to keep
pca.fit(df_combined.iloc[2:, 3:])  # Fit PCA on x, y columns (assuming x is in column 1 and y is in column 2)

# Get the top 5 principal components and their singular values
top_components = pca.components_
singular_values = pca.singular_values_

# Reconstruct the spectra using the top 5 components and singular values
reconstructed_spectra = pd.DataFrame(top_components.T @ singular_values, index=df.index, columns=['Reconstructed_y'])

# Calculate the residual spectra
residual_spectra = df.iloc[:, 2] - reconstructed_spectra['Reconstructed_y']  # Assuming y is in column 3

# Minimize the sum of squared residual spectra
minimized_residual = (residual_spectra**2).sum()

# Output the minimized residual
print(minimized_residual)








# Separate the y-values from the DataFrame
y_values = df_combined.drop(columns=['Title'])  # Assuming 'Title' is the column containing titles

# Perform PCA
pca = PCA()
pca.fit(y_values)

# Get the principal components
principal_components = pca.components_

# Get the wavelengths (assuming they are the same for all spectra)
wavelengths = df_combined.columns[1:]  # Assuming the first column is 'Title'

# Plot each principal component
plt.figure(figsize=(10, 6))
for i in range(principal_components.shape[0]):
    plt.plot(wavelengths, principal_components[i], label=f'Component {i+1}')
plt.xlabel('Wavelength')
plt.ylabel('Intensity')
plt.title('Principal Components')
plt.legend()
plt.grid(True)
plt.show()


# Plot the singular values
singular_values = pca.singular_values_
spectrum_titles = titles  # Assuming titles is a list of spectrum titles
plt.figure(figsize=(10, 6))
plt.plot(spectrum_titles, singular_values, marker='o', linestyle='-')
plt.xlabel('Spectrum Titles')
plt.ylabel('Singular Value')
plt.title('Singular Values')
plt.grid(True)
plt.xticks(rotation=45)  # Rotate x-axis labels for better readability if needed
plt.show()

      Title      X          Y
0         1  470.0  422.13300
1         1  470.1  422.08000
2         1  470.2  422.00100
3         1  470.3  422.67700
4         1  470.4  422.67400
...     ...    ...        ...
1796    999  649.6    7.01178
1797    999  649.7    7.00195
1798    999  649.8    6.96083
1799    999  649.9    6.98529
1800    999  650.0    6.98897

[57632 rows x 3 columns]


ValueError: at least one array or dtype is required

In [11]:
def print_arguments(*args):
    for arg in args:
        print(arg)
print_arguments(1,2,3,4)

1
2
3
4
