In [49]:
import tarfile
import os
import pandas as pd


In [50]:

# path to the tar.gz file
tar_path = 'pythia8311_xsec.tar.gz'
extract_path = './extracted_files'

# extract the tar.gz file
with tarfile.open(tar_path, 'r:gz') as tar:
    tar.extractall(path=extract_path)

# path to /xsec_dat in extracted files directory
xsec_dat_path = os.path.join(extract_path, 'xsec_dat')


In [51]:

# get a list of all filenames from /xsec_dat
file_list = [os.path.join(xsec_dat_path, f) for f in os.listdir(xsec_dat_path) if os.path.isfile(os.path.join(xsec_dat_path, f))]
#print("Files to be processed:", file_list)


Files to be processed: ['./extracted_files/xsec_dat/main1010_p_16O_1e6_100000.dat', './extracted_files/xsec_dat/main1010_xi0_12C_1e12_100000.dat', './extracted_files/xsec_dat/main1010_k0_12C_1e9_100000.dat', './extracted_files/xsec_dat/main1010_kp_40Ar_1e6_100000.dat', './extracted_files/xsec_dat/main1010_k0s_40Ar_1e4_100000.dat', './extracted_files/xsec_dat/main1010_k0_16O_1e4_100000.dat', './extracted_files/xsec_dat/main1010_xi0_16O_1e4_100000.dat', './extracted_files/xsec_dat/main1010_xi0_14N_1e4_100000.dat', './extracted_files/xsec_dat/main1010_k0_16O_1e6_100000.dat', './extracted_files/xsec_dat/main1010_lambda0_40Ar_1e8_100000.dat', './extracted_files/xsec_dat/main1010_pip_12C_1e7_100000.dat', './extracted_files/xsec_dat/main1010_n_16O_1e7_100000.dat', './extracted_files/xsec_dat/main1010_pim_12C_1e5_100000.dat', './extracted_files/xsec_dat/main1010_k0s_12C_1e5_100000.dat', './extracted_files/xsec_dat/main1010_xi0_14N_1e12_100000.dat', './extracted_files/xsec_dat/main1010_lambda0_

In [52]:

# read each file into a pandas DataFrame
dataframes = {}
for file in file_list:
    try:
        # no header in the files
        df = pd.read_csv(file, delimiter='\t', header=None)
        # store df with the file name
        dataframes[file] = df
        print(f"Read file {file} with structure {df.shape}")
    except pd.errors.EmptyDataError:
        print(f"Skipping file with no columns to parse: {file}") # 
    except Exception as e:
        print(f"Error reading {file}: {e}")


Read file ./extracted_files/xsec_dat/main1010_p_16O_1e6_100000.dat with structure (17, 2)
Read file ./extracted_files/xsec_dat/main1010_xi0_12C_1e12_100000.dat with structure (17, 2)
Read file ./extracted_files/xsec_dat/main1010_k0_12C_1e9_100000.dat with structure (17, 2)
Read file ./extracted_files/xsec_dat/main1010_kp_40Ar_1e6_100000.dat with structure (17, 2)
Read file ./extracted_files/xsec_dat/main1010_k0s_40Ar_1e4_100000.dat with structure (17, 2)
Read file ./extracted_files/xsec_dat/main1010_k0_16O_1e4_100000.dat with structure (17, 2)
Read file ./extracted_files/xsec_dat/main1010_xi0_16O_1e4_100000.dat with structure (17, 2)
Read file ./extracted_files/xsec_dat/main1010_xi0_14N_1e4_100000.dat with structure (17, 2)
Read file ./extracted_files/xsec_dat/main1010_k0_16O_1e6_100000.dat with structure (17, 2)
Read file ./extracted_files/xsec_dat/main1010_lambda0_40Ar_1e8_100000.dat with structure (17, 2)
Read file ./extracted_files/xsec_dat/main1010_pip_12C_1e7_100000.dat with stru

In [53]:

# path to new files
output_path = './output_files'
os.makedirs(output_path, exist_ok=True)

# write each df to a new file: same name but with space as column separator
for file, df in dataframes.items():
    output_file = os.path.join(output_path, os.path.basename(file))
    df.to_csv(output_file, sep=' ', index=False, header=False)
    print(f"Saved file {output_file} with shape {df.shape}")


Saved file ./output_files/main1010_p_16O_1e6_100000.dat with shape (17, 2)
Saved file ./output_files/main1010_xi0_12C_1e12_100000.dat with shape (17, 2)
Saved file ./output_files/main1010_k0_12C_1e9_100000.dat with shape (17, 2)
Saved file ./output_files/main1010_kp_40Ar_1e6_100000.dat with shape (17, 2)
Saved file ./output_files/main1010_k0s_40Ar_1e4_100000.dat with shape (17, 2)
Saved file ./output_files/main1010_k0_16O_1e4_100000.dat with shape (17, 2)
Saved file ./output_files/main1010_xi0_16O_1e4_100000.dat with shape (17, 2)
Saved file ./output_files/main1010_xi0_14N_1e4_100000.dat with shape (17, 2)
Saved file ./output_files/main1010_k0_16O_1e6_100000.dat with shape (17, 2)
Saved file ./output_files/main1010_lambda0_40Ar_1e8_100000.dat with shape (17, 2)
Saved file ./output_files/main1010_pip_12C_1e7_100000.dat with shape (17, 2)
Saved file ./output_files/main1010_n_16O_1e7_100000.dat with shape (17, 2)
Saved file ./output_files/main1010_pim_12C_1e5_100000.dat with shape (17, 2)