In [1]:
using Glob
using YAML
using Plots
using LinearAlgebra
using CSV
using PyCall; using DataFrames
using Tables
using Dates
using Base.Threads: @threads

pandas = pyimport("pandas")

PyObject <module 'pandas' from '/Users/carlitos/miniconda3/envs/franny/lib/python3.12/site-packages/pandas/__init__.py'>

In [2]:
function read_yaml(file_path)
    try
        open(file_path) do file
            return YAML.load(file)
        end
    catch e
        println("Error reading YAML file $file_path: ", e)
        return nothing
    end
end


function read_parquet(file_path::String)::Union{DataFrame, Nothing}
    try
        pd_df = pandas.read_parquet(file_path)
        columns = Dict(Symbol(col) => collect(pd_df[col].tolist()) for col in pd_df.columns)
        return DataFrame(columns)
    catch e
        println("Error reading Parquet file $file_path: ", e)
        return nothing
    end
end


function extract_zenith_azimuth(args)
    zenith_azimuth = match(r"--zenith\s+(\S+)\s+--azimuth\s+(\S+)", args)
    if zenith_azimuth !== nothing
        zenith = parse(Float64, zenith_azimuth.captures[1])
        azimuth = parse(Float64, zenith_azimuth.captures[2])
        return zenith, azimuth
    else
        return nothing, nothing
    end
end

function extract_plane_data(file_path::String)
    config_data = read_yaml(file_path)
    x_axis = config_data["x-axis"]
    y_axis = config_data["y-axis"]
    return x_axis, y_axis
end

function extract_normal_vector(file_path::String)
    config_data = read_yaml(file_path)
    if config_data === nothing
        return nothing
    end
    args = config_data["args"]
    xdir_match = match(r"--xdir\s+(\S+)", args)
    ydir_match = match(r"--ydir\s+(\S+)", args)
    zdir_match = match(r"--zdir\s+(\S+)", args)
    if xdir_match !== nothing && ydir_match !== nothing && zdir_match !== nothing
        xdir = parse(Float64, xdir_match.captures[1])
        ydir = parse(Float64, ydir_match.captures[1])
        zdir = parse(Float64, zdir_match.captures[1])
        return [xdir, ydir, zdir]
    else
        return nothing
    end
end

function process_batch(batch_files::Vector{String})::Vector{DataFrame}
    batch_data_list = Vector{DataFrame}()
    for parquet_file in batch_files
        particle_data = read_parquet(parquet_file)
        if particle_data !== nothing
            push!(batch_data_list, particle_data)
        end
    end
    return batch_data_list
end

process_batch (generic function with 1 method)

In [3]:
directory = "/Users/carlitos/Desktop/TAMBO/plots/air_shower_reconstruction/data"
configs = glob("sim_test*/config.yaml", directory)
obs_configs = glob("sim_test*/particles_obs_final/config.yaml", directory)
parquet_files = glob("sim_test*/particles_obs_final/particles.parquet", directory)

1091-element Vector{String}:
 "/Users/carlitos/Desktop/TAMBO/p"[93m[1m ⋯ 59 bytes ⋯ [22m[39m"les_obs_final/particles.parquet"
 "/Users/carlitos/Desktop/TAMBO/p"[93m[1m ⋯ 59 bytes ⋯ [22m[39m"les_obs_final/particles.parquet"
 "/Users/carlitos/Desktop/TAMBO/p"[93m[1m ⋯ 59 bytes ⋯ [22m[39m"les_obs_final/particles.parquet"
 "/Users/carlitos/Desktop/TAMBO/p"[93m[1m ⋯ 59 bytes ⋯ [22m[39m"les_obs_final/particles.parquet"
 "/Users/carlitos/Desktop/TAMBO/p"[93m[1m ⋯ 59 bytes ⋯ [22m[39m"les_obs_final/particles.parquet"
 "/Users/carlitos/Desktop/TAMBO/p"[93m[1m ⋯ 59 bytes ⋯ [22m[39m"les_obs_final/particles.parquet"
 "/Users/carlitos/Desktop/TAMBO/p"[93m[1m ⋯ 58 bytes ⋯ [22m[39m"les_obs_final/particles.parquet"
 "/Users/carlitos/Desktop/TAMBO/p"[93m[1m ⋯ 58 bytes ⋯ [22m[39m"les_obs_final/particles.parquet"
 "/Users/carlitos/Desktop/TAMBO/p"[93m[1m ⋯ 58 bytes ⋯ [22m[39m"les_obs_final/particles.parquet"
 "/Users/carlitos/Desktop/TAMBO/p"[93m[1m ⋯ 58 bytes ⋯ [22m

In [4]:
zeniths = Float64[]
azimuths = Float64[]

for config in configs
    config_data = read_yaml(config)
    args = config_data["args"]
    zenith, azimuth = extract_zenith_azimuth(args)
    new_zenith = π - zenith
    push!(zeniths, rad2deg(new_zenith))
    push!(azimuths, rad2deg(azimuth))
end

In [10]:
batch_size = 100  # Define batch size
particle_data_list = DataFrame[]

for i in 1:batch_size:length(parquet_files)
    println("Processing batch starting with file $i...")
    batch_files = parquet_files[i:min(i + batch_size - 1, length(parquet_files))]
    println("Batch files: ", batch_files)
    batch_data_list = process_batch(batch_files)
    if !isempty(batch_data_list)
        append!(particle_data_list, batch_data_list)
    end
    println("Finished processing batch starting with file $i.")
end

println("Processing complete.")

In [6]:
if !isempty(particle_data_list)
    println("First DataFrame:")
    println(particle_data_list[1])
else
    println("No Parquet files were successfully read.")
end

First DataFrame:
[1m7993×11 DataFrame[0m
[1m  Row [0m│[1m kinetic_energy [0m[1m nx           [0m[1m ny         [0m[1m nz          [0m[1m pdg        [0m[1m shower [0m[1m time       [0m[1m weight  [0m[1m x              [0m[1m y            [0m[1m z       [0m
      │[90m Float64        [0m[90m Float64      [0m[90m Float64    [0m[90m Float64     [0m[90m Int64      [0m[90m Int64  [0m[90m Float64    [0m[90m Float64 [0m[90m Float64        [0m[90m Float64      [0m[90m Float64 [0m
──────┼───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
    1 │     0.854988     0.0737989    -0.962081   -0.262592           -13       0  2.22328e-5      1.0   -513.671        -1724.41         0.0
    2 │     0.00554593   0.154068     -0.694111   -0.703187            22       0  1.24967e-5      1.0   -293.315         1618.18         0.0
    3 │     8.59289      0.195121     -0.962824  

Excessive output truncated after 524346 bytes.

In [7]:
obs_x_axes = []
obs_y_axes = []

for obs_config in obs_configs
    config_data = read_yaml(obs_config)
    if config_data !== nothing
        x_axis, y_axis = extract_plane_data(obs_config)
        push!(obs_x_axes, x_axis)
        push!(obs_y_axes, y_axis)
    else
        println("Skipping corrupted config file: $obs_config")
    end
end

println("Collected x-axes: ", obs_x_axes)
println("Collected y-axes: ", obs_y_axes)

In [8]:
normal_vectors = []

for config in configs
    println("Processing file: $config")
    normal_vector = extract_normal_vector(config)
    if normal_vector !== nothing
        println("Extracted normal vector: $normal_vector")
        push!(normal_vectors, normal_vector)
    else
        println("Skipping corrupted config file: $config")
    end
end

println("Collected normal vectors: ", normal_vectors)
