In [None]:
import numpy as np
import os 
import pandas as pd


In [None]:
files= sorted(os.listdir("/kaggle/input/ariel-data-challenge-2024/train"))
planets=[]

for f in files:
    planets.append(int(f))
planets.sort()


In [None]:
def process_signal(signal, gain, offset, dead, flat, dark, linear_corr):
    # ADC conversion
    signal = signal * gain + offset
    
    # Clean flat, dark, dead
    flat = np.ma.masked_where(dead, flat)
    dark = np.ma.masked_where(dead, dark)
    flat = np.tile(flat, (signal.shape[0], 1, 1))
    dark = np.tile(dark, (signal.shape[0], 1, 1))
    dead = np.tile(dead, (signal.shape[0], 1, 1))
    
    signal = np.ma.masked_where(dead, signal)
    signal = (signal - dark) / (flat - dark)
    # Apply linear correction
#     linear_corr = np.flip(linear_corr, axis=0)
#     for x in range(signal.shape[1]):
#         for y in range(signal.shape[2]):
#             poli = np.poly1d(linear_corr[:, x, y])
#             signal[:, x, y] = poli(signal[:, x, y])
    # Get CDS
    signal = signal[1::2, :, :] - signal[::2, :, :]
    
    return signal

def bin_obs(cds_signal, binning):
    cds_transposed = cds_signal.transpose(1, 2, 0)
    binned_shape = (cds_transposed.shape[0], cds_transposed.shape[1], cds_transposed.shape[2] // binning)
    cds_binned = np.zeros(binned_shape)
    for i in range(binned_shape[2]):
        cds_binned[:, :, i] = np.sum(cds_transposed[:, :, i*binning:(i+1)*binning], axis=2)
    return cds_binned.transpose(2, 0, 1)

In [None]:
count=0
adc_info = pd.read_csv('/kaggle/input/ariel-data-challenge-2024/train_adc_info.csv')

for planet in planets:
    
    print(f"\nProcessing planet: {planet}")
    planet = int(planet) 
    fgs_dark         = pd.read_parquet(f'/kaggle/input/ariel-data-challenge-2024/train/{planet}/FGS1_calibration/dark.parquet').values.reshape(32, 32)
    fgs_dead         = pd.read_parquet(f'/kaggle/input/ariel-data-challenge-2024/train/{planet}/FGS1_calibration/dead.parquet').values.reshape(32, 32)
    fgs_flat         = pd.read_parquet(f'/kaggle/input/ariel-data-challenge-2024/train/{planet}/FGS1_calibration/flat.parquet').values.reshape(32, 32)
    fgs_linear_corr  = pd.read_parquet(f'/kaggle/input/ariel-data-challenge-2024/train/{planet}/FGS1_calibration/linear_corr.parquet').values.reshape(6, 32, 32)
        
            
    fgs1     = pd.read_parquet(f'/kaggle/input/ariel-data-challenge-2024/train/{planet}/FGS1_signal.parquet').values
    
    selected_adc_info = adc_info.loc[adc_info['planet_id'] == planet]
    
    fgs1_gain       = selected_adc_info['FGS1_adc_gain'].values[0]
    fgs1_offset     = selected_adc_info['FGS1_adc_offset'].values[0]
    fgs1 = fgs1.reshape(135000, 32, 32)    
    
    
    fgs1_processed = process_signal(fgs1, fgs1_gain, fgs1_offset, fgs_dead, fgs_flat, fgs_dark, fgs_linear_corr)
        
    fgs1_binned = bin_obs(fgs1_processed, binning=(60))
    
    arr=fgs1_binned

#     arr=np.sum(fgs1_binned , axis=(1,2))
    
    np.save(f"/kaggle/working/{planet}_fgs.npy" , arr )
    count+=1
    print(f"saved planet number {planet} " , f"completed planets {count}" , arr.shape)


In [None]:
print(f"{count} planets saved , cleaning done")