In [None]:
from math import *
from numpy import *
import matplotlib.pyplot as plt
import pandas as pd
import os

In [None]:
current_dir = os.getcwd()
folder_path = os.path.join(current_dir, 'main1010_sim')

In [None]:
projectiles = ["p", "ap", "n", "an", "pi0", "pip", "pim", "k0", "kp", "km", "k0l", "k0s", "lambda0", "sigma0", "sigmap", "sigmam", "xi0"]
proj_ids = ["2212", "-2212", "2112", "-2112", "111", "211", "-211", "311", "321", "-321", "130", "310", "3122", "3212", "3222", "3112", "3322"]
targ_ids = ["2212", "1000060120", "1000070140", "1000080160", "1000180400"]

In [None]:
dfs = []
files_with_nan_proj_id = []

for file_name in os.listdir(folder_path):
    if file_name.endswith('.dat'):
        particle = file_name.split('_')[1]
        
        if particle in projectiles:
            file_path = os.path.join(folder_path, file_name)
            df = pd.read_csv(file_path, delimiter='\t', header=None)
            df = df.transpose()
            df.columns = df.iloc[0]
            df = df.drop(df.index[0])
            df['elab'] = df['elab'].str[4:].astype(int)
            dfs.append(df)

In [None]:
merged_df = pd.concat(dfs, ignore_index=True)

merged_df['proj_id'] = pd.Categorical(merged_df['proj_id'], categories=proj_ids, ordered=True)
merged_df['targ_id'] = pd.Categorical(merged_df['targ_id'], categories=targ_ids, ordered=True)
merged_df.sort_values(by=['proj_id', 'targ_id', 'elab'], ascending=[True, True, True], inplace=True)

In [None]:
def format_scientific(x):
    if isinstance(x, (int)) and x >= 1e6:
        return '{:.0e}'.format(x)
    else:
        return x

merged_df.iloc[:, 3:] = merged_df.iloc[:, 3:].applymap(format_scientific)

In [None]:
merged_df.iloc[:, 3:] = merged_df.iloc[:, 3:].apply(pd.to_numeric, errors='coerce')
merged_df.iloc[:, 3:] = merged_df.iloc[:, 3:].applymap(lambda x: '{:.6e}'.format(x) if isinstance(x, (int, float)) else x)

In [None]:
print("Merged DataFrame:")
print(merged_df)

In [None]:
output_dir = os.path.join(current_dir, 'split_data')
os.makedirs(output_dir, exist_ok=True)

grouped = merged_df.groupby(['proj_id', 'targ_id'])

for group_name, grouped_df in grouped:
    file_name = f"xsec_{group_name[0]}_{group_name[1]}.tsv"
    file_path = os.path.join(output_dir, file_name)
    
    header = '#' + '\t'.join(grouped_df.columns)
    units = '#' + '\t'.join(['GeV'] + [''] + [''] + ['mb'] * (grouped_df.shape[1] - 3))
    
    grouped_df['elab'] = grouped_df['elab'].apply(lambda x: pow(10, x))
    grouped_df['elab'] = grouped_df['elab'].apply(lambda x: '{:.6e}'.format(x))
    
    grouped_df.to_csv(file_path, sep='\t', index=False, header=False)
    
    with open(file_path, 'r+') as f:
        content = f.read()
        f.seek(0, 0)
        f.write(header + '\n' +  units + '\n' + content)

    print(f"Saved DataFrame for proj_id: {group_name[0]}, targ_id: {group_name[1]} to:", file_path)

In [None]:
output_file_path = os.path.join(current_dir, 'xsec.tsv')
merged_df.to_csv(output_file_path, sep='\t', index=False)

print("Merged DataFrame saved to:", output_file_path)