In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import io
import chardet
import os
import glob

In [None]:
# 自动识别当前文件夹下唯一的csv文件
csv_files = glob.glob('*.csv')
if len(csv_files) == 1:
    file_path = csv_files[0]
else:
    raise FileNotFoundError('未找到唯一的csv文件，请检查文件夹内容')


In [None]:
# 检测文件编码
with open(file_path, 'rb') as f:
    rawdata = f.read(10000)  # 只读取前10000字节来检测编码
    result = chardet.detect(rawdata)
    encoding = result['encoding']
    print(f"检测到的文件编码: {encoding}")

# 读取文件内容
try:
    with open(file_path, 'r', encoding=encoding) as f:
        content = f.read()
except:
    # 如果检测到的编码失败，尝试utf-8忽略错误
    with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
        content = f.read()

# 替换可能的空字符
content = content.replace('\x00', '')

# 将内容读入DataFrame
df = pd.read_csv(io.StringIO(content), sep='\t')

In [None]:
# 读取数据后，首先检查前两行
print("原始数据前五行:")
print(df.head())

# 重置索引
df = df.reset_index(drop=True)

In [None]:
# 1. 识别数据块结构
# 每个测量指标占据两列：第1列为体积(ml)，第2列为测量值

# 2. 提取各数据系列
data_series = {
    'UV_280': {'vol_col': 0, 'data_col': 1, 'unit': 'mAU'},
    'Cond': {'vol_col': 2, 'data_col': 3, 'unit': 'mS/cm'},
    'Conc_B': {'vol_col': 4, 'data_col': 5, 'unit': '%'},
    'UV_280_CUT': {'vol_col': 12, 'data_col': 13, 'unit': 'mAU'}
}

# 3. 创建新的数据框
processed_data = pd.DataFrame()

# 4. 处理每个数据系列
for name, info in data_series.items():
    # 提取体积列(跳过前两行标题)
    vol = pd.to_numeric(df.iloc[2:, info['vol_col']], errors='coerce')
    # 提取数据列
    values = pd.to_numeric(df.iloc[2:, info['data_col']], errors='coerce')
    
    # 添加到新数据框
    processed_data[f'{name}_Volume'] = vol
    processed_data[name] = values

# 5. 清理数据 - 移除全为NaN的行
processed_data = processed_data.dropna(how='all')

# 重置索引
processed_data = processed_data.reset_index(drop=True)

# # 计算UV_280 = UV_280_Volume - UV_280_CUT_Volume
# processed_data['UV_280'] = processed_data['UV_280'] - processed_data['UV_280_CUT']

# # 筛选数据：只保留UV_280_Volume >= 60mL的数据点
# # 但保留所有列的数据
# filter_condition = processed_data['UV_280_Volume'] >= 60
# processed_data = processed_data[filter_condition].copy()

print("\n处理后的数据:")
print(processed_data.head())

In [None]:
# 提取 Fraction 体积
fraction_volumes = pd.to_numeric(df.iloc[2:, 10], errors='coerce')
fraction_labels = df.iloc[2:, 11]

# 创建 Fraction 数据框
fraction_df = pd.DataFrame({
    'Frac_Volume': fraction_volumes,
    'Frac': fraction_labels
})

# 清理数据：移除空标签行
fraction_df = fraction_df.dropna(subset=['Frac'])
fraction_df = fraction_df[fraction_df['Frac'] != '']

# 重置索引
fraction_df = fraction_df.reset_index(drop=True)

print("fraction_df:")
fraction_df

In [None]:
processed_data

In [None]:
plt.rcParams.update({
    'font.family': 'Times New Roman',    # Change font type
    'font.weight': 'bold',
    'axes.labelweight': 'bold',   # Bold font
    'font.size': 20,           # Default font size
    'lines.linewidth': 3.0,     # Increase line width
    # 'axes.titlesize': 20,      # Title font size
    # 'axes.labelsize': 20,      # Axis label font size
    # 'xtick.labelsize': 16,     # X-axis tick label size
    # 'ytick.labelsize': 16,     # Y-axis tick label size
    # 'legend.fontsize': 24      # Legend font size
})

# 创建图形和左侧坐标轴
fig, ax1 = plt.subplots(figsize=(12, 8))

# 绘制UV 280 (左侧Y轴)
color = 'blue'
ax1.set_xlabel('Volume (mL)')
ax1.set_ylabel('UV 280 (mAU)', color='black')
ax1.plot(processed_data['UV_280_Volume'], processed_data['UV_280'], 
         color=color, label='UV 280 (mAU)')
ax1.plot(processed_data['UV_280_CUT_Volume'], processed_data['UV_280_CUT'], 
         color='yellow', label='UV 280 Cut (mAU)')
ax1.tick_params(axis='y', labelcolor='black')

# 合并图例
lines1, labels1 = ax1.get_legend_handles_labels()
# lines2, labels2 = ax2.get_legend_handles_labels()
# ax1.legend(lines1 + lines2, labels1 + labels2, loc='upper right')

# 展示图例
ax1.legend(lines1, labels1, loc='upper right')

# 设置网格线（只显示水平网格线）
ax1.grid(axis='y', linestyle='--', alpha=0.7)
ax1.grid(axis='x', visible=False)  # 不显示垂直网格线

# 保存图像
plt.show()

In [None]:
# 计算UV_280 = UV_280_Volume - UV_280_CUT_Volume
processed_data['UV_280'] = processed_data['UV_280'] - processed_data['UV_280_CUT']

In [None]:
processed_data

In [None]:
# 创建图形和左侧坐标轴
fig, ax1 = plt.subplots(figsize=(12, 8))

# 绘制UV 280 (左侧Y轴)
color = 'blue'
ax1.set_xlabel('Volume (mL)')
ax1.set_ylabel('UV 280 (mAU)', color='black')
ax1.plot(processed_data['UV_280_Volume'], processed_data['UV_280'], 
         color=color, label='UV 280 (mAU)')
ax1.tick_params(axis='y', labelcolor='black')

# 创建右侧Y轴并绘制conductivity
ax2 = ax1.twinx()
color2 = 'orange'
ax2.set_ylabel('Conductivity (mS/cm)', color='black')
ax2.plot(processed_data['Cond_Volume'], processed_data['Cond'], 
         color=color2, label='Conductivity (mS/cm)')
ax2.tick_params(axis='y', labelcolor='black')

# 添加 Fraction 收集点标记
# 1. 在x轴下方添加小的垂直线标记
for vol in fraction_df['Frac_Volume']:
    # 在x轴下方添加短垂直线（位置在y=0上方）
    ax1.axvline(x=vol, color='darkgreen', linestyle='-', alpha=0.8, ymin=0.005, ymax=0.01, clip_on=False)

# 2. 添加Fraction标签（旋转90度，放在x轴下方）
for idx, row in fraction_df.iterrows():
    vol = row['Frac_Volume']
    label = row['Frac']
    
    # 只标记每个组的第一个收集点（如5.A.1, 5.B.1等），避免标签重叠
    if label.endswith('.1') or label.endswith('.5') or label == 'Waste(Frac)':
        ax1.text(vol, 0.1, label,
                 transform=ax1.get_xaxis_transform(),  # 使用x轴变换坐标
                 rotation=90,
                 verticalalignment='top', 
                 horizontalalignment='center',
                 fontsize=10,
                 color='darkgreen',
                 bbox=dict(facecolor='white', alpha=0, pad=1, edgecolor='none', boxstyle='round'))

# 调整底部边距为Fraction标签留出空间
plt.subplots_adjust(bottom=0.15)
 
# 添加图例说明红色标记
from matplotlib.lines import Line2D
red_line = Line2D([0], [0], color='darkgreen', linestyle='-', linewidth=1)

# 合并图例：获取两侧轴的图例句柄和标签
lines1, labels1 = ax1.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()
ax1.legend(lines1 + lines2, labels1 + labels2, loc='upper center')

# 设置网格线（只显示水平网格线）
ax1.grid(axis='y', linestyle='--', alpha=0.7)
ax1.grid(axis='x', visible=False)  # 不显示垂直网格线

# set axis
ax1.set_ylim(bottom=-1)
ax1.set_ylim(top=30)

# 保存图像
plt.savefig(file_path.replace('.csv', '') + '.jpg', dpi=1200, bbox_inches='tight')
plt.show()