In [None]:
import pandas as pd
import os
import cv2 #使用cv2进行图像读入和处理
import numpy as np
from scipy.interpolate import make_interp_spline  #导入平滑曲线所需的库 Import the libraries required for smooth curves, you can use others

def calculate_paramater(NO2, NO, N2O, N2, NH4_ion): #此函数用于计算浓度为x时的产物比例, This function is used to extract the product ratio at a concentration of x using cv2.
    a = NO2
    b = NO
    c = N2O
    d = N2
    e = NH4_ion
    # 根据化学反应反应方程式，用a,b,c,d,e推出X、H_ion、NO3_ion、H20的物质的量
    # According to the chemical reaction equation, deduce the amount of substance for X, H_ion, NO3_ion, and H2O using a, b, c, d, e.
    X = X2_ion = 1
    H_ion = 1
    NO3_ion = 1
    H2O = 1

    # 将所有系数除以X以进行归一化
    # Divide all coefficients by X to normalize.
    scale = 1/X

    X = X2_ion = 1  # 归一化后X和X2_ion都为1. After normalization, both X and X2_ion are 1.
    H_ion *= scale
    NO3_ion *= scale
    H2O *= scale
    NO2 = a * scale
    NO = b * scale
    N2O = c * scale
    N2 = d * scale
    NH4_ion = e * scale

    return X, H_ion, NO3_ion, X2_ion, NO2, NO, N2O, N2, NH4_ion, H2O


def get_data(image_label, x):  #此函数用于使用cv2提取在浓度为x时的产物比例. This function is used to extract the product ratio at a concentration of x using cv2.
    # 读取图像，Read image
    img = cv2.imread(f"{image_label}.png")
    if img is None:
        raise ValueError(f"Cannot read the img: {image_label}.png")

    cropped_img = img[97:711,126:899]

    # 获取裁剪后图像尺寸，Get the size of the cropped image
    height, width = cropped_img.shape[:2]

    # 将x值映射到图像像素坐标，Map the x value to the image pixel coordinates.
    x_min, x_max = 2, 12  # 图像x轴范围，Image x-axis range.
    x_pixel = int((x - x_min) * width / (x_max - x_min))

    # 确保x_pixel在有效范围内，Ensure x_pixel is within the valid range.
    x_pixel = max(0, min(x_pixel, width-1))

    # 定义每种元素对应的颜色范围 (OpenCV读入自动默认BGR格式)，Define the color range corresponding to each element (OpenCV reads in BGR format by default).
    color_ranges = {
        'N2': ([160, 0, 0], [255, 140, 100]),       # 蓝色，Blue
        'NH4_ion': ([0, 160, 160], [100, 255, 255]), # 橘黄色，Orange
        'N2O': ([0, 160, 0], [150, 255, 150]),      # 绿色，Green
        'NO': ([0, 0, 160], [100, 100, 255]),        # 红色，Red
        'NO2': ([150, 0, 160], [255, 130, 255]),    # 紫色，Purple
    }
    #
    smoothed_results = {'N2': 1, 'NH4_ion': 1, 'N2O': 1, 'NO': 1, 'NO2': 1}
    return (smoothed_results['N2'], smoothed_results['NH4_ion'], smoothed_results['N2O'],
            smoothed_results['NO'], smoothed_results['NO2'])

In [None]:
#-------------读取训练集,训练集地址已经设定好，下面这段不用修改------------------#
#-----Read the training set, the address of the training set has been set, and the following section does not need to be modified-------#
datapath_train = "/bohr/train-gvtn/v1/"
input_csv_path_train = os.path.join(datapath_train + 'input_train.csv')
data_train = pd.read_csv(input_csv_path_train)

#---对训练数据进行计算--Calculate on the training data----#
output_data_train = [] # 用于存储训练集输出结果的列表，A list used to store the output results of the training set.

# 遍历每一张图和x，Traverse each image and x.
for index, row in data_train.iterrows():
    image_label = os.path.join(datapath_train, row['File Name'].split('.')[0])  # 获取文件名（不带扩展名）并与datapath连接，Get the file name (without the extension) and concatenate it with datapath.
    x_value = row['c']

    # 调用get_data函数处理图像并获取结果，Invoke the get_data function to process the image and obtain the results.
    results = get_data(image_label, x_value)

    # 调用calculate函数计算值，Call the calculate function to compute the value.
    calculated_values = calculate_paramater(results[0], results[1], results[2], results[3], results[4])

    # 将结果添加到输出数据中，Add the result to the output data.
    output_data_train.append({
        'File Name': row['File Name'],
        'Scaled mol X': calculated_values[0],
        'p_1': calculated_values[1],
        'p_2': calculated_values[2],
        'Scaled mol X+': calculated_values[3],
        'p_3': calculated_values[4],
        'p_4': calculated_values[5],
        'p_5': calculated_values[6],
        'p_6': calculated_values[7],
        'p_7': calculated_values[8],
        'p_8': calculated_values[9]
    })

# 创建输出DataFrame并保存为CSV文件，Create an output DataFrame and save it as a CSV file.
output_df_train = pd.DataFrame(output_data_train)
print(output_df_train)

In [None]:
#-------------读取测试集---------------#“DATA_PATH”是测试集加密后的环境变量，按照如下方式可以在提交后，系统评分时访问测试集，但是选手无法直接下载
#----Read the testing set, “DATA_PATH” is an environment variable for the encrypted test set. After submission, you can access the test set for system scoring in the following manner, but the contestant cannot download it directly.-----#
if os.environ.get('DATA_PATH'):
    DATA_PATH = os.environ.get("DATA_PATH") + "/"
else:
    print("Baseline运行时，因为无法读取测试集，所以会有此条报错，属于正常现象")
    print("When baseline is running, this error message will appear because the test set cannot be read, which is a normal phenomenon.")
    #Baseline运行时，因为无法读取测试集，所以会有此条报错，属于正常现象
    #When baseline is running, this error message will appear because the test set cannot be read, which is a normal phenomenon.

datapath_test = DATA_PATH
input_csv_path_test = os.path.join(datapath_test + 'input_test.csv')
data_test = pd.read_csv(input_csv_path_test)

#---对测试数据进行计算--Calculate on the test data----#
output_data = [] # 用于存储测试集输出结果的列表，A list used to store the output results of the test set.

# 遍历每一张图和x，Traverse each image and x.
for index, row in data_test.iterrows():
    image_label = os.path.join(datapath_test, row['File Name'].split('.')[0])  # 获取文件名（不带扩展名）并与datapath连接
    x_value = row['c']

    # 调用get_data函数处理图像并获取结果，Invoke the get_data function to process the image and obtain the results.
    results = get_data(image_label, x_value)

    # 调用calculate函数计算值，Call the calculate function to compute the value.
    calculated_values = calculate_paramater(results[0], results[1], results[2], results[3], results[4])

    # 将结果添加到输出数据中，Add the result to the output data.
    output_data.append({
        'File Name': row['File Name'],
        'Scaled mol X': calculated_values[0],
        'p_1': calculated_values[1],
        'p_2': calculated_values[2],
        'Scaled mol X+': calculated_values[3],
        'p_3': calculated_values[4],
        'p_4': calculated_values[5],
        'p_5': calculated_values[6],
        'p_6': calculated_values[7],
        'p_7': calculated_values[8],
        'p_8': calculated_values[9]
    })

# 创建输出DataFrame并保存为CSV文件，Create the output DataFrame and save it as a CSV file
output_df = pd.DataFrame(output_data)
output_csv_path = os.path.join('submission.csv')
print(output_df)
output_df.to_csv(output_csv_path, index=False)