This notebook is an update of https://www.kaggle.com/code/sergeifironov/ariel-only-correlation
from Sergei Fironov

Updates :
- keep 10:22 pixels from the 32 (the image are well centred)
- Use the derivative for the determination of the beginning and end of the signal during eclipse (idea from Reza R. Choubeh)
- 'Simplification' of the code for minimize
- Degree of polyfit <= 4
- Predictions of test after training Ridge regression with the modelization results (targets predictions with modelization) and the True targets. 

# Librairies

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
import joblib

from sklearn.linear_model import Ridge
from sklearn.metrics import r2_score, mean_squared_error
import itertools

from scipy.optimize import minimize
from scipy import optimize

from astropy.stats import sigma_clip

In [2]:
dataset = 'test'
adc_info = pd.read_csv('/kaggle/input/ariel-data-challenge-2024/'+f'{dataset}_adc_info.csv',index_col='planet_id')
axis_info = pd.read_parquet('/kaggle/input/ariel-data-challenge-2024/axis_info.parquet')

# Calibration

In [3]:
def apply_linear_corr(linear_corr,clean_signal):
    linear_corr = np.flip(linear_corr, axis=0)
    for x, y in itertools.product(
                range(clean_signal.shape[1]), range(clean_signal.shape[2])
            ):
        poli = np.poly1d(linear_corr[:, x, y])
        clean_signal[:, x, y] = poli(clean_signal[:, x, y])
    return clean_signal

def clean_dark(signal, dark, dt):
    dark = np.tile(dark, (signal.shape[0], 1, 1))
    signal -= dark* dt[:, np.newaxis, np.newaxis]
    return signal

def preproc(dataset, adc_info, sensor, binning = 15):
    cut_inf, cut_sup = 39, 321
    sensor_sizes_dict = {"AIRS-CH0":[[11250, 32, 356], [1, 32, cut_sup-cut_inf]], "FGS1":[[135000, 32, 32], [1, 32, 32]]}
    binned_dict = {"AIRS-CH0":[11250 // binning // 2, 282], "FGS1":[135000 // binning // 2]}
    linear_corr_dict = {"AIRS-CH0":(6, 32, 356), "FGS1":(6, 32, 32)}
    planet_ids = adc_info.index
    
    feats = []
    for i, planet_id in tqdm(list(enumerate(planet_ids))):
        signal = pd.read_parquet(f'/kaggle/input/ariel-data-challenge-2024/{dataset}/{planet_id}/{sensor}_signal.parquet').to_numpy()
        dark_frame = pd.read_parquet(f'/kaggle/input/ariel-data-challenge-2024/{dataset}/' + str(planet_id) + '/' + sensor + '_calibration/dark.parquet', engine='pyarrow').to_numpy()
        dead_frame = pd.read_parquet(f'/kaggle/input/ariel-data-challenge-2024/{dataset}/' + str(planet_id) + '/' + sensor + '_calibration/dead.parquet', engine='pyarrow').to_numpy()
        flat_frame = pd.read_parquet(f'/kaggle/input/ariel-data-challenge-2024/{dataset}/' + str(planet_id) + '/' + sensor + '_calibration/flat.parquet', engine='pyarrow').to_numpy()
        linear_corr = pd.read_parquet(f'/kaggle/input/ariel-data-challenge-2024/{dataset}/' + str(planet_id) + '/' + sensor + '_calibration/linear_corr.parquet').values.astype(np.float64).reshape(linear_corr_dict[sensor])

        signal = signal.reshape(sensor_sizes_dict[sensor][0]) 
        gain = adc_info[f'{sensor}_adc_gain'].values[i]
        offset = adc_info[f'{sensor}_adc_offset'].values[i]
        signal = signal / gain + offset
        
        hot = sigma_clip(
            dark_frame, sigma=5, maxiters=5
        ).mask
        
        if sensor != "FGS1":
            signal = signal[:, :, cut_inf:cut_sup] 
            dt = np.ones(len(signal))*0.1 
            dt[1::2] += 4.5 #@bilzard idea
            linear_corr = linear_corr[:, :, cut_inf:cut_sup]
            dark_frame = dark_frame[:, cut_inf:cut_sup]
            dead_frame = dead_frame[:, cut_inf:cut_sup]
            flat_frame = flat_frame[:, cut_inf:cut_sup]
            hot = hot[:, cut_inf:cut_sup]
        else:
            dt = np.ones(len(signal))*0.1
            dt[1::2] += 0.1
            
        signal = signal.clip(0) #@graySnow idea
        linear_corr_signal = apply_linear_corr(linear_corr, signal)
        signal = clean_dark(linear_corr_signal, dark_frame, dt)
        
        flat = flat_frame.reshape(sensor_sizes_dict[sensor][1])
        flat[dead_frame.reshape(sensor_sizes_dict[sensor][1])] = np.nan
        flat[hot.reshape(sensor_sizes_dict[sensor][1])] = np.nan
        signal = signal / flat
        
        
        if sensor == "FGS1":
            signal = signal[:,10:22,10:22] # **** updates ****
            signal = signal.reshape(sensor_sizes_dict[sensor][0][0],144) # # **** updates ****

        if sensor != "FGS1":
            signal = signal[:,10:22,:] # **** updates ****

        mean_signal = np.nanmean(signal, axis=1) 
        cds_signal = (mean_signal[1::2] - mean_signal[0::2])
        
        binned = np.zeros((binned_dict[sensor]))
        for j in range(cds_signal.shape[0] // binning):
            binned[j] = cds_signal[j*binning:j*binning+binning].mean(axis=0) 
                   
        if sensor == "FGS1":
            binned = binned.reshape((binned.shape[0],1))
        
        feats.append(binned)
        
    return np.stack(feats)
    
pre_train = np.concatenate([preproc(f'{dataset}', adc_info, "FGS1", 30*12), preproc(f'{dataset}', adc_info, "AIRS-CH0", 30)], axis=2)

100%|██████████| 1/1 [00:06<00:00,  6.82s/it]
100%|██████████| 1/1 [00:06<00:00,  6.12s/it]


# Modelization

In [4]:
def phase_detector(signal):
    
    MIN = np.argmin(signal[30:140])+30
    signal1 = signal[:MIN ]
    signal2 = signal[MIN :]

    first_derivative1 = np.gradient(signal1)
    first_derivative1 /= first_derivative1.max()
    first_derivative2 = np.gradient(signal2)
    first_derivative2 /= first_derivative2.max()

    phase1 = np.argmin(first_derivative1)
    phase2 = np.argmax(first_derivative2) + MIN

    return phase1, phase2
    
def objective(s):
    
    best_q = 1e10
    for i in range(4) :
        delta = 2
        x = list(range(signal.shape[0]-delta*4))
        y = signal[:p1-delta].tolist() + (signal[p1+delta:p2 - delta] * (1 + s)).tolist() + signal[p2+delta:].tolist()
        
        z = np.polyfit(x, y, deg=i)
        p = np.poly1d(z)
        q = np.abs(p(x) - y).mean()
    
    if q < best_q :
        best_q = q
    
    return q


all_s = []
for i in tqdm(range(len(adc_info))):
    
    signal = pre_train[i,:,1:].mean(axis=1)
    p1,p2 = phase_detector(signal)
 
    r = minimize(
                objective,
                [0.0001],
                method= 'Nelder-Mead'
                  )
    s = r.x[0]
    all_s.append(s)
    
all_s = np.repeat(np.array(all_s), 283).reshape((len(all_s), 283))        

100%|██████████| 1/1 [00:00<00:00, 18.14it/s]


# Predictions with Ridge model

In [5]:
model = joblib.load("/kaggle/input/adc24-meta-model-ridge/model_ridge_10_22_delta2.joblib")
pred = model.predict(all_s)
pd.DataFrame(pred)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,273,274,275,276,277,278,279,280,281,282
0,0.002677,0.002707,0.0027,0.002696,0.002697,0.002689,0.002689,0.002698,0.002695,0.002691,...,0.002704,0.002704,0.002703,0.002703,0.002703,0.002704,0.002704,0.002704,0.002702,0.002701


# Submission

In [6]:
ss = pd.read_csv('/kaggle/input/ariel-data-challenge-2024/sample_submission.csv')
sigma = np.ones_like(all_s) * 0.000145 
pred = pred.clip(0) 
submission = pd.DataFrame(np.concatenate([pred,sigma], axis=1), columns=ss.columns[1:])
submission.index = adc_info.index
submission.to_csv('submission.csv')
submission


Unnamed: 0_level_0,wl_1,wl_2,wl_3,wl_4,wl_5,wl_6,wl_7,wl_8,wl_9,wl_10,...,sigma_274,sigma_275,sigma_276,sigma_277,sigma_278,sigma_279,sigma_280,sigma_281,sigma_282,sigma_283
planet_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
499191466,0.002677,0.002707,0.0027,0.002696,0.002697,0.002689,0.002689,0.002698,0.002695,0.002691,...,0.000145,0.000145,0.000145,0.000145,0.000145,0.000145,0.000145,0.000145,0.000145,0.000145
