## SEC analysis
### Put this file under the same folder as the SEC data (only one csv file)
### Run all the cells together every time you want to analyze a new file
### The final output is picture file with the same name as the input file


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import io
import chardet
import os
import glob

In [None]:
# Automatically detect the unique CSV file in the current folder
csv_files = glob.glob('*.csv')
if len(csv_files) == 1:
    file_path = csv_files[0]
else:
    raise FileNotFoundError('No unique CSV file found. Please check folder contents')


In [None]:
# Detect file encoding
with open(file_path, 'rb') as f:
    rawdata = f.read(10000)  # Only read first 10,000 bytes to detect encoding
    result = chardet.detect(rawdata)
    encoding = result['encoding']
    print(f"Detected file encoding: {encoding}")

# Read file content
try:
    with open(file_path, 'r', encoding=encoding) as f:
        content = f.read()
except:
    # If detected encoding fails, try utf-8 and ignore errors
    with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
        content = f.read()

# Replace possible null characters
content = content.replace('\x00', '')

# Read content into DataFrame
df = pd.read_csv(io.StringIO(content), sep='\t')

In [None]:
# Print first five lines
print("First five lines:")
print(df.head())

# Reset index
df = df.reset_index(drop=True)

In [None]:
# 1. Identify data block structure
# Each measurement metric occupies two columns: col1 is volume (mL), col2 is value

# 2. Extract each data series
data_series = {
    'UV_280': {'vol_col': 0, 'data_col': 1, 'unit': 'mAU'},
    'Cond': {'vol_col': 2, 'data_col': 3, 'unit': 'mS/cm'},
    'Conc_B': {'vol_col': 4, 'data_col': 5, 'unit': '%'},
    'UV_280_CUT': {'vol_col': 12, 'data_col': 13, 'unit': 'mAU'}
}

# 3. Create new DataFrame
processed_data = pd.DataFrame()

# 4. Process each data series
for name, info in data_series.items():
    # Extract volume column (skip first two header rows)
    vol = pd.to_numeric(df.iloc[2:, info['vol_col']], errors='coerce')
    # Extract data column
    values = pd.to_numeric(df.iloc[2:, info['data_col']], errors='coerce')
    
    # Add to new DataFrame
    processed_data[f'{name}_Volume'] = vol
    processed_data[name] = values

# 5. Clean data - remove rows that are all NaN
processed_data = processed_data.dropna(how='all')

# Reset index
processed_data = processed_data.reset_index(drop=True)

# # Compute UV_280 = UV_280_Volume - UV_280_CUT_Volume
# processed_data['UV_280'] = processed_data['UV_280'] - processed_data['UV_280_CUT']

# # Filter data: keep only points with UV_280_Volume >= 60 mL
# # But retain data for all columns
# filter_condition = processed_data['UV_280_Volume'] >= 60
# processed_data = processed_data[filter_condition].copy()

print("\nProcessed data:")
print(processed_data.head())

In [None]:
# Extract fraction volume
fraction_volumes = pd.to_numeric(df.iloc[2:, 10], errors='coerce')
fraction_labels = df.iloc[2:, 11]

# Create fraction DataFrame
fraction_df = pd.DataFrame({
    'Frac_Volume': fraction_volumes,
    'Frac': fraction_labels
})

# Clean data: remove rows with empty labels
fraction_df = fraction_df.dropna(subset=['Frac'])
fraction_df = fraction_df[fraction_df['Frac'] != '']

# Reset index
fraction_df = fraction_df.reset_index(drop=True)

print("fraction_df:")
fraction_df

In [None]:
processed_data

In [None]:
plt.rcParams.update({
    'font.family': 'Times New Roman',    # Change font type
    'font.weight': 'bold',
    'axes.labelweight': 'bold',   # Bold font
    'font.size': 20,           # Default font size
    'lines.linewidth': 3.0,     # Increase line width
    # 'axes.titlesize': 20,      # Title font size
    # 'axes.labelsize': 20,      # Axis label font size
    # 'xtick.labelsize': 16,     # X-axis tick label size
    # 'ytick.labelsize': 16,     # Y-axis tick label size
    # 'legend.fontsize': 24      # Legend font size
})

# Create figure and left axis
fig, ax1 = plt.subplots(figsize=(12, 8))

# Plot UV 280 (left Y-axis)
color = 'blue'
ax1.set_xlabel('Volume (mL)')
ax1.set_ylabel('UV 280 (mAU)', color='black')
ax1.plot(processed_data['UV_280_Volume'], processed_data['UV_280'], 
         color=color, label='UV 280 (mAU)')
ax1.plot(processed_data['UV_280_CUT_Volume'], processed_data['UV_280_CUT'], 
         color='yellow', label='UV 280 Cut (mAU)')
ax1.tick_params(axis='y', labelcolor='black')

# Combine legends
lines1, labels1 = ax1.get_legend_handles_labels()
# lines2, labels2 = ax2.get_legend_handles_labels()
# ax1.legend(lines1 + lines2, labels1 + labels2, loc='upper right')

# Show legend
ax1.legend(lines1, labels1, loc='upper right')

# Configure grid lines (only horizontal)
ax1.grid(axis='y', linestyle='--', alpha=0.7)
ax1.grid(axis='x', visible=False)  # Hide vertical grid lines

# Show figure
plt.show()

In [None]:
# Compute UV_280 = UV_280_Volume - UV_280_CUT_Volume
processed_data['UV_280'] = processed_data['UV_280'] - processed_data['UV_280_CUT']

In [None]:
processed_data

In [None]:
# Create figure and left axis
fig, ax1 = plt.subplots(figsize=(12, 8))

# Plot UV 280 (left Y-axis)
color = 'blue'
ax1.set_xlabel('Volume (mL)')
ax1.set_ylabel('UV 280 (mAU)', color='black')
ax1.plot(processed_data['UV_280_Volume'], processed_data['UV_280'], 
         color=color, label='UV 280 (mAU)')
ax1.tick_params(axis='y', labelcolor='black')

# Create right Y-axis and plot conductivity
ax2 = ax1.twinx()
color2 = 'orange'
ax2.set_ylabel('Conductivity (mS/cm)', color='black')
ax2.plot(processed_data['Cond_Volume'], processed_data['Cond'], 
         color=color2, label='Conductivity (mS/cm)')
ax2.tick_params(axis='y', labelcolor='black')

# Add fraction collection markers
# 1. Add small vertical markers below the x-axis
for vol in fraction_df['Frac_Volume']:
    # Add short vertical lines slightly above y=0 on the x-axis
    ax1.axvline(x=vol, color='darkgreen', linestyle='-', alpha=0.8, ymin=0.005, ymax=0.01, clip_on=False)

# 2. Add fraction labels (rotated 90Â°, placed below the x-axis)
for idx, row in fraction_df.iterrows():
    vol = row['Frac_Volume']
    label = row['Frac']
    
    # Only label the first collection point per group (e.g., 5.A.1, 5.B.1), to avoid overlap
    if label.endswith('.1') or label.endswith('.5') or label == 'Waste(Frac)':
        ax1.text(vol, 0.1, label,
                 transform=ax1.get_xaxis_transform(),  # Use x-axis transform coordinates
                 rotation=90,
                 verticalalignment='top', 
                 horizontalalignment='center',
                 fontsize=10,
                 color='darkgreen',
                 bbox=dict(facecolor='white', alpha=0, pad=1, edgecolor='none', boxstyle='round'))

# Adjust bottom margin to leave space for fraction labels
plt.subplots_adjust(bottom=0.15)
 
# Add legend entry for fraction markers (example line)
from matplotlib.lines import Line2D
red_line = Line2D([0], [0], color='darkgreen', linestyle='-', linewidth=1)

# Combine legends: get handles and labels from both axes
lines1, labels1 = ax1.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()
ax1.legend(lines1 + lines2, labels1 + labels2, loc='upper center')

# Configure grid lines (only horizontal)
ax1.grid(axis='y', linestyle='--', alpha=0.7)
ax1.grid(axis='x', visible=False)  # Hide vertical grid lines

# set axis
ax1.set_ylim(bottom=-1)
ax1.set_ylim(top=30)

# Save the figure
plt.savefig(file_path.replace('.csv', '') + '.jpg', dpi=1200, bbox_inches='tight')
plt.show()