# First Model: CNN

## Import packages

In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import itertools
import os
import glob 
import seaborn as sns
import tensorflow as tf
import multiprocessing as mp
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Concatenate, Lambda
from tensorflow.keras.models import Model
from astropy.stats import sigma_clip
from tqdm import tqdm
from multiprocessing import Pool
from concurrent.futures import ThreadPoolExecutor, as_completed

sns.set_theme(style='dark')
palette = sns.color_palette('muted')
pd.set_option('display.max_columns', None)

print('TF version:', tf.__version__)

2025-09-28 03:22:39.725730: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1759029759.913813      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1759029759.975239      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


TF version: 2.18.0


## Load and calibrate the data

In [2]:
path_folder = '/kaggle/input/ariel-data-challenge-2025/' 
path_out = '/kaggle/tmp/data_light_raw/'

if not os.path.exists(path_out):
    os.makedirs(path_out)
    print(f"Directory {path_out} created.")
else:
    print(f"Directory {path_out} already exists.")

CHUNKS_SIZE = 4

Directory /kaggle/tmp/data_light_raw/ created.


In [3]:
def ADC_convert(signal, gain=0.4369, offset=-1000):
    """The Analog-to-Digital Conversion (adc) is performed by the detector to convert
    the pixel voltage into an integer number. Since we are using the same conversion number 
    this year, we have simply hard-coded it inside."""
    signal = signal.astype(np.float64)
    signal /= gain
    signal += offset
    return signal

def mask_hot_dead(signal, dead, dark):
    hot = sigma_clip(
        dark, sigma=5, maxiters=5
    ).mask
    hot = np.tile(hot, (signal.shape[0], 1, 1))
    dead = np.tile(dead, (signal.shape[0], 1, 1))
    signal = np.ma.masked_where(dead, signal)
    signal = np.ma.masked_where(hot, signal)
    return signal

def apply_linear_corr(linear_corr, clean_signal):
    linear_corr = np.flip(linear_corr, axis=0)
    for x, y in itertools.product(
                range(clean_signal.shape[1]), range(clean_signal.shape[2])
            ):
        poli = np.poly1d(linear_corr[:, x, y])
        clean_signal[:, x, y] = poli(clean_signal[:, x, y])
    return clean_signal

def clean_dark(signal, dead, dark, dt):
    dark = np.ma.masked_where(dead, dark)
    dark = np.tile(dark, (signal.shape[0], 1, 1))
    signal -= dark * dt[:, np.newaxis, np.newaxis]
    return signal

def get_cds(signal):
    cds = signal[:,1::2,:,:] - signal[:,::2,:,:]
    return cds

def bin_obs(cds_signal, binning):
    cds_transposed = cds_signal.transpose(0,1,3,2)
    cds_binned = np.zeros((cds_transposed.shape[0], cds_transposed.shape[1] // binning, cds_transposed.shape[2], cds_transposed.shape[3]))
    for i in range(cds_transposed.shape[1] // binning):
        cds_binned[:,i,:,:] = np.sum(cds_transposed[:,i*binning:(i+1)*binning,:,:], axis=1)
    return cds_binned

def correct_flat_field(flat, dead, signal):
    flat = flat.transpose(1, 0)
    dead = dead.transpose(1, 0)
    flat = np.ma.masked_where(dead, flat)
    flat = np.tile(flat, (signal.shape[0], 1, 1))
    signal /= flat
    return signal

def get_index(files, CHUNKS_SIZE):
    index = []
    for file in files:
        file_name = file.split('/')[-1]
        if file_name.split('_')[0] == 'AIRS-CH0' and file_name.split('_')[1] == 'signal' and file_name.split('_')[2] == '0.parquet':
            file_index = os.path.basename(os.path.dirname(file))
            index.append(int(file_index))
    index = np.array(index)
    index = np.sort(index) 
    # credit to DennisSakva
    index = np.array_split(index, len(index) // CHUNKS_SIZE)
    
    return index

In [4]:
files = glob.glob(os.path.join(path_folder + 'train/', '*/*'))

In [5]:
index = get_index(files, CHUNKS_SIZE)
print(len(index[0]))

4


In [6]:
axis_info = pd.read_parquet(os.path.join(path_folder,'axis_info.parquet'))
DO_MASK = False
DO_THE_NL_CORR = False
DO_DARK = False
DO_FLAT = False
TIME_BINNING = True

cut_inf, cut_sup = 39, 321
l = cut_sup - cut_inf

In [7]:
def load_calibration_data_batch(path_folder, index_chunk, cut_inf, cut_sup, dataset):
    """Load all calibration data for the chunk at once"""
    calibration_data = {}
    
    for idx in index_chunk:
        calibration_data[idx] = {}
        
        # AIRS calibration data
        airs_flat = pd.read_parquet(os.path.join(path_folder, f'{dataset}/{idx}/AIRS-CH0_calibration_0/flat.parquet'))
        calibration_data[idx]['airs_flat'] = airs_flat.values.astype(np.float32).reshape((32, 356))[:, cut_inf:cut_sup]
        
        airs_dark = pd.read_parquet(os.path.join(path_folder, f'{dataset}/{idx}/AIRS-CH0_calibration_0/dark.parquet'))
        calibration_data[idx]['airs_dark'] = airs_dark.values.astype(np.float32).reshape((32, 356))[:, cut_inf:cut_sup]
        
        airs_dead = pd.read_parquet(os.path.join(path_folder, f'{dataset}/{idx}/AIRS-CH0_calibration_0/dead.parquet'))
        calibration_data[idx]['airs_dead'] = airs_dead.values.astype(np.float32).reshape((32, 356))[:, cut_inf:cut_sup]
        
        airs_linear = pd.read_parquet(os.path.join(path_folder, f'{dataset}/{idx}/AIRS-CH0_calibration_0/linear_corr.parquet'))
        calibration_data[idx]['airs_linear'] = airs_linear.values.astype(np.float32).reshape((6, 32, 356))[:, :, cut_inf:cut_sup]
        
        # FGS1 calibration data
        fgs_flat = pd.read_parquet(os.path.join(path_folder, f'{dataset}/{idx}/FGS1_calibration_0/flat.parquet'))
        calibration_data[idx]['fgs_flat'] = fgs_flat.values.astype(np.float32).reshape((32, 32))
        
        fgs_dark = pd.read_parquet(os.path.join(path_folder, f'{dataset}/{idx}/FGS1_calibration_0/dark.parquet'))
        calibration_data[idx]['fgs_dark'] = fgs_dark.values.astype(np.float32).reshape((32, 32))
        
        fgs_dead = pd.read_parquet(os.path.join(path_folder, f'{dataset}/{idx}/FGS1_calibration_0/dead.parquet'))
        calibration_data[idx]['fgs_dead'] = fgs_dead.values.astype(np.float32).reshape((32, 32))
        
        fgs_linear = pd.read_parquet(os.path.join(path_folder, f'{dataset}/{idx}/FGS1_calibration_0/linear_corr.parquet'))
        calibration_data[idx]['fgs_linear'] = fgs_linear.values.astype(np.float32).reshape((6, 32, 32))
    
    return calibration_data

In [8]:
def process_single_observation(args):
    """Process a single observation with all AIRS and FGS1 cleaning steps"""
    # Unpack the arguments
    (i, index_chunk, path_folder, cut_inf, cut_sup, l, axis_info, calibration_data, DO_MASK, DO_THE_NL_CORR, DO_DARK) = args
    
    idx = index_chunk[i]
    
    # AIRS Processing
    # Load AIRS signal data
    df = pd.read_parquet(os.path.join(path_folder, f'train/{idx}/AIRS-CH0_signal_0.parquet'))
    signal = df.values.astype(np.float32).reshape((df.shape[0], 32, 356))

    # 1. ADC Conversion
    signal = ADC_convert(signal)
    dt_airs = axis_info['AIRS-CH0-integration_time'].dropna().values
    dt_airs[1::2] += 0.1
    chopped_signal = signal[:, :, cut_inf:cut_sup]
    del signal, df
    
    # Get pre-loaded calibration data for AIRS
    flat = calibration_data[idx]['airs_flat']
    dark = calibration_data[idx]['airs_dark'] 
    dead_airs = calibration_data[idx]['airs_dead']
    linear_corr = calibration_data[idx]['airs_linear']

    # 2. Mask Hot/Dead Pixels
    if DO_MASK:
        chopped_signal = mask_hot_dead(chopped_signal, dead_airs, dark)

    # 3. Linearity Correction
    if DO_THE_NL_CORR: 
        linear_corr_signal = apply_linear_corr(linear_corr, chopped_signal)
        chopped_signal = linear_corr_signal

    # 4. Dark Current Subtraction
    if DO_DARK: 
        cleaned_signal = clean_dark(chopped_signal, dead_airs, dark, dt_airs)
        chopped_signal = cleaned_signal

    # Store AIRS result
    airs_result = chopped_signal
    
    # FGS1 Processing
    # Load FGS1 signal data
    df = pd.read_parquet(os.path.join(path_folder, f'train/{idx}/FGS1_signal_0.parquet'))
    fgs_signal = df.values.astype(np.float32).reshape((df.shape[0], 32, 32))

    # 1. ADC Conversion
    fgs_signal = ADC_convert(fgs_signal)
    dt_fgs1 = np.ones(len(fgs_signal)) * 0.1
    dt_fgs1[1::2] += 0.1
    chopped_FGS1 = fgs_signal
    del fgs_signal, df
    
    # Get pre-loaded calibration data for FGS1
    flat = calibration_data[idx]['fgs_flat']
    dark = calibration_data[idx]['fgs_dark']
    dead_fgs1 = calibration_data[idx]['fgs_dead']
    linear_corr = calibration_data[idx]['fgs_linear']

    # 2. Mask Hot/Dead pixels
    if DO_MASK:
        chopped_FGS1 = mask_hot_dead(chopped_FGS1, dead_fgs1, dark)

    # 3. Linearity Correction
    if DO_THE_NL_CORR: 
        linear_corr_signal = apply_linear_corr(linear_corr, chopped_FGS1)
        chopped_FGS1 = linear_corr_signal

    # 4. Dark Current Subtraction
    if DO_DARK: 
        cleaned_signal = clean_dark(chopped_FGS1, dead_fgs1, dark, dt_fgs1)
        chopped_FGS1 = cleaned_signal

    # Store FGS1 result
    fgs_result = chopped_FGS1
    
    # Return the processed results
    return i, airs_result, fgs_result

In [9]:
def process_single_observation_test(args):
    """Process a single observation with all AIRS and FGS1 cleaning steps"""
    # Unpack the arguments
    (i, index_chunk, path_folder, cut_inf, cut_sup, l, axis_info, calibration_data, DO_MASK, DO_THE_NL_CORR, DO_DARK) = args
    
    idx = index_chunk[i]
    
    # AIRS Processing
    # Load AIRS signal data
    df = pd.read_parquet(os.path.join(path_folder, f'test/{idx}/AIRS-CH0_signal_0.parquet'))
    signal = df.values.astype(np.float32).reshape((df.shape[0], 32, 356))

    # 1. ADC Conversion
    signal = ADC_convert(signal)
    dt_airs = axis_info['AIRS-CH0-integration_time'].dropna().values
    dt_airs[1::2] += 0.1
    chopped_signal = signal[:, :, cut_inf:cut_sup]
    del signal, df
    
    # Get pre-loaded calibration data for AIRS
    flat = calibration_data[idx]['airs_flat']
    dark = calibration_data[idx]['airs_dark'] 
    dead_airs = calibration_data[idx]['airs_dead']
    linear_corr = calibration_data[idx]['airs_linear']

    # 2. Mask Hot/Dead Pixels
    if DO_MASK:
        chopped_signal = mask_hot_dead(chopped_signal, dead_airs, dark)

    # 3. Linearity Correction
    if DO_THE_NL_CORR: 
        linear_corr_signal = apply_linear_corr(linear_corr, chopped_signal)
        chopped_signal = linear_corr_signal

    # 4. Dark Current Subtraction
    if DO_DARK: 
        cleaned_signal = clean_dark(chopped_signal, dead_airs, dark, dt_airs)
        chopped_signal = cleaned_signal

    # Store AIRS result
    airs_result = chopped_signal
    
    # FGS1 Processing
    # Load FGS1 signal data
    df = pd.read_parquet(os.path.join(path_folder, f'test/{idx}/FGS1_signal_0.parquet'))
    fgs_signal = df.values.astype(np.float32).reshape((df.shape[0], 32, 32))

    # 1. ADC Conversion
    fgs_signal = ADC_convert(fgs_signal)
    dt_fgs1 = np.ones(len(fgs_signal)) * 0.1
    dt_fgs1[1::2] += 0.1
    chopped_FGS1 = fgs_signal
    del fgs_signal, df
    
    # Get pre-loaded calibration data for FGS1
    flat = calibration_data[idx]['fgs_flat']
    dark = calibration_data[idx]['fgs_dark']
    dead_fgs1 = calibration_data[idx]['fgs_dead']
    linear_corr = calibration_data[idx]['fgs_linear']

    # 2. Mask Hot/Dead pixels
    if DO_MASK:
        chopped_FGS1 = mask_hot_dead(chopped_FGS1, dead_fgs1, dark)

    # 3. Linearity Correction
    if DO_THE_NL_CORR: 
        linear_corr_signal = apply_linear_corr(linear_corr, chopped_FGS1)
        chopped_FGS1 = linear_corr_signal

    # 4. Dark Current Subtraction
    if DO_DARK: 
        cleaned_signal = clean_dark(chopped_FGS1, dead_fgs1, dark, dt_fgs1)
        chopped_FGS1 = cleaned_signal

    # Store FGS1 result
    fgs_result = chopped_FGS1
    
    # Return the processed results
    return i, airs_result, fgs_result

In [10]:
for n, index_chunk in enumerate(tqdm(index)):
    # Load all calibration data once at the beginning 
    calibration_data = load_calibration_data_batch(path_folder, index_chunk, cut_inf, cut_sup, 'train')
    
    # Pre-allocate output arrays
    AIRS_CH0_clean = np.zeros((CHUNKS_SIZE, 11250, 32, l), dtype=np.float32)
    FGS1_clean = np.zeros((CHUNKS_SIZE, 135000, 32, 32), dtype=np.float32)
    
    # Parallel Processing
    # Determine number of workers 
    num_workers = min(2, CHUNKS_SIZE)
    
    # Prepare arguments for each observation
    args_list = []
    for i in range(CHUNKS_SIZE):
        args = (i, index_chunk, path_folder, cut_inf, cut_sup, l, axis_info, calibration_data, DO_MASK, DO_THE_NL_CORR, DO_DARK)
        args_list.append(args)
        
    # Process observations in parallel
    results = []
    with ThreadPoolExecutor(max_workers=num_workers) as executor:        
        # Submit all tasks to the thread pool
        future_to_index = {executor.submit(process_single_observation, args): args[0] for args in args_list}
                
        # Collect results as they complete
        for future in tqdm(as_completed(future_to_index), total=CHUNKS_SIZE, desc=f"Processing observations"):
            i, airs_result, fgs_result = future.result()
            results.append((i, airs_result, fgs_result))
        
    # Sort results by observation index (i) to maintain order
    results.sort(key=lambda x: x[0])
    
    # Store results in your existing arrays
    for result in results:
        i, airs_result, fgs_result = result
        AIRS_CH0_clean[i] = airs_result
        FGS1_clean[i] = fgs_result
        
    # 5. Get Correlated Double Sampling
    AIRS_cds = get_cds(AIRS_CH0_clean)
    FGS1_cds = get_cds(FGS1_clean)
    
    del AIRS_CH0_clean, FGS1_clean
    
    # 6. (Optional) Time Binning (to reduce space)
    if TIME_BINNING:
        AIRS_cds_binned = bin_obs(AIRS_cds, binning=30)
        FGS1_cds_binned = bin_obs(FGS1_cds, binning=30*12)
    else:
        AIRS_cds = AIRS_cds.transpose(0,1,3,2)
        AIRS_cds_binned = AIRS_cds
        FGS1_cds = FGS1_cds.transpose(0,1,3,2)
        FGS1_cds_binned = FGS1_cds
    
    del AIRS_cds, FGS1_cds

    # 7. Flat Field Correction - use pre-loaded calibration data
    for i in range(CHUNKS_SIZE):
        if DO_FLAT:
            flat_airs = calibration_data[index_chunk[i]]['airs_flat']  # Fixed: added [i]
            flat_fgs = calibration_data[index_chunk[i]]['fgs_flat']    # Fixed: added [i]
            dead_airs = calibration_data[index_chunk[i]]['airs_dead']  # Fixed: added [i]
            dead_fgs1 = calibration_data[index_chunk[i]]['fgs_dead']   # Fixed: added [i]
            
            corrected_AIRS_cds_binned = correct_flat_field(flat_airs, dead_airs, AIRS_cds_binned[i])
            AIRS_cds_binned[i] = corrected_AIRS_cds_binned
            corrected_FGS1_cds_binned = correct_flat_field(flat_fgs, dead_fgs1, FGS1_cds_binned[i])
            FGS1_cds_binned[i] = corrected_FGS1_cds_binned

    # Save data
    np.save(os.path.join(path_out, 'AIRS_clean_train_{}.npy'.format(n)), AIRS_cds_binned)
    np.save(os.path.join(path_out, 'FGS1_train_{}.npy'.format(n)), FGS1_cds_binned)
    del AIRS_cds_binned, FGS1_cds_binned, calibration_data

  0%|          | 0/275 [00:00<?, ?it/s]
Processing observations:   0%|          | 0/4 [00:00<?, ?it/s][A
Processing observations:  25%|██▌       | 1/4 [00:04<00:14,  4.94s/it][A
Processing observations:  50%|█████     | 2/4 [00:05<00:04,  2.09s/it][A
Processing observations: 100%|██████████| 4/4 [00:09<00:00,  2.49s/it]
  0%|          | 1/275 [00:17<1:19:04, 17.32s/it]
Processing observations:   0%|          | 0/4 [00:00<?, ?it/s][A
Processing observations:  25%|██▌       | 1/4 [00:06<00:19,  6.41s/it][A
Processing observations:  75%|███████▌  | 3/4 [00:11<00:03,  3.50s/it][A
Processing observations: 100%|██████████| 4/4 [00:11<00:00,  2.95s/it]
  1%|          | 2/275 [00:36<1:23:46, 18.41s/it]
Processing observations:   0%|          | 0/4 [00:00<?, ?it/s][A
Processing observations:  25%|██▌       | 1/4 [00:04<00:14,  4.76s/it][A
Processing observations: 100%|██████████| 4/4 [00:09<00:00,  2.38s/it]
  1%|          | 3/275 [00:53<1:20:17, 17.71s/it]
Processing observations:   0%

In [11]:
TRAIN_CHUNKS_SIZE = CHUNKS_SIZE
CHUNKS_SIZE = 1
test_files = glob.glob(os.path.join(path_folder + 'test/', '*/*'))
test_index = get_index(test_files, CHUNKS_SIZE)

for n, index_chunk in enumerate(tqdm(test_index)):
    # Load all calibration data once at the beginning 
    calibration_data = load_calibration_data_batch(path_folder, index_chunk, cut_inf, cut_sup, 'test')
    
    # Pre-allocate output arrays
    AIRS_CH0_clean = np.zeros((CHUNKS_SIZE, 11250, 32, l), dtype=np.float32)
    FGS1_clean = np.zeros((CHUNKS_SIZE, 135000, 32, 32), dtype=np.float32)
    
    # Parallel Processing
    # Determine number of workers (start with 2 to be safe)
    num_workers = min(2, CHUNKS_SIZE)
    
    # Prepare arguments for each observation
    args_list = []
    for i in range(CHUNKS_SIZE):
        args = (i, index_chunk, path_folder, cut_inf, cut_sup, l, axis_info, calibration_data, DO_MASK, DO_THE_NL_CORR, DO_DARK)
        args_list.append(args)
        
    # Process observations in parallel
    results = []
    with ThreadPoolExecutor(max_workers=num_workers) as executor:        
        # Submit all tasks to the thread pool
        future_to_index = {executor.submit(process_single_observation_test, args): args[0] for args in args_list}
                
        # Collect results as they complete
        for future in tqdm(as_completed(future_to_index), total=CHUNKS_SIZE, desc=f"Processing observations"):
            i, airs_result, fgs_result = future.result()
            results.append((i, airs_result, fgs_result))
        
    # Sort results by observation index (i) to maintain order
    results.sort(key=lambda x: x[0])
    
    # Store results in your existing arrays
    for result in results:
        i, airs_result, fgs_result = result
        AIRS_CH0_clean[i] = airs_result
        FGS1_clean[i] = fgs_result
        
    # 5. Get Correlated Double Sampling
    AIRS_cds = get_cds(AIRS_CH0_clean)
    FGS1_cds = get_cds(FGS1_clean)
    
    del AIRS_CH0_clean, FGS1_clean
    
    # 6. (Optional) Time Binning (to reduce space)
    if TIME_BINNING:
        AIRS_cds_binned = bin_obs(AIRS_cds, binning=30)
        FGS1_cds_binned = bin_obs(FGS1_cds, binning=30*12)
    else:
        AIRS_cds = AIRS_cds.transpose(0,1,3,2)
        AIRS_cds_binned = AIRS_cds
        FGS1_cds = FGS1_cds.transpose(0,1,3,2)
        FGS1_cds_binned = FGS1_cds
    
    del AIRS_cds, FGS1_cds

    # 7. Flat Field Correction - use pre-loaded calibration data
    for i in range(CHUNKS_SIZE):
        if DO_FLAT:
            flat_airs = calibration_data[index_chunk[i]]['airs_flat']  # Fixed: added [i]
            flat_fgs = calibration_data[index_chunk[i]]['fgs_flat']    # Fixed: added [i]
            dead_airs = calibration_data[index_chunk[i]]['airs_dead']  # Fixed: added [i]
            dead_fgs1 = calibration_data[index_chunk[i]]['fgs_dead']   # Fixed: added [i]
            
            corrected_AIRS_cds_binned = correct_flat_field(flat_airs, dead_airs, AIRS_cds_binned[i])
            AIRS_cds_binned[i] = corrected_AIRS_cds_binned
            corrected_FGS1_cds_binned = correct_flat_field(flat_fgs, dead_fgs1, FGS1_cds_binned[i])
            FGS1_cds_binned[i] = corrected_FGS1_cds_binned

    # Save data
    np.save(os.path.join(path_out, 'AIRS_clean_test_{}.npy'.format(n)), AIRS_cds_binned)
    np.save(os.path.join(path_out, 'FGS1_test_{}.npy'.format(n)), FGS1_cds_binned)
    del AIRS_cds_binned, FGS1_cds_binned, calibration_data

  0%|          | 0/1 [00:00<?, ?it/s]
Processing observations:   0%|          | 0/1 [00:00<?, ?it/s][A
Processing observations: 100%|██████████| 1/1 [00:04<00:00,  4.86s/it]
100%|██████████| 1/1 [00:06<00:00,  6.75s/it]


In [12]:
def load_data(file, chunk_size, nb_files): 
    data0 = np.load(file + '_0.npy')
    data_all = np.zeros((nb_files * chunk_size, data0.shape[1], data0.shape[2], data0.shape[3]))
    data_all[:chunk_size] = data0
    for i in range(1, nb_files): 
        data_all[i * chunk_size : (i+1) * chunk_size] = np.load(file + '_{}.npy'.format(i))
    return data_all 

data_train_AIRS = load_data(path_out + 'AIRS_clean_train', TRAIN_CHUNKS_SIZE, len(index)) 
data_train_FGS = load_data(path_out + 'FGS1_train', TRAIN_CHUNKS_SIZE, len(index))

print(data_train_AIRS.shape)
print(data_train_FGS.shape)

(1100, 187, 282, 32)
(1100, 187, 32, 32)


In [13]:
data_test_AIRS = load_data(path_out + 'AIRS_clean_test', CHUNKS_SIZE, 1) 
data_test_FGS = load_data(path_out + 'FGS1_test', CHUNKS_SIZE, 1)

print(data_test_AIRS.shape)
print(data_test_FGS.shape)

(1, 187, 282, 32)
(1, 187, 32, 32)


In [14]:
df_train = pd.read_csv(path_folder + 'train.csv')
df_train.set_index('planet_id', inplace=True)

df_train = df_train[df_train.index.isin(index[0])]

print(df_train.shape)
df_train.head()

(4, 283)


Unnamed: 0_level_0,wl_1,wl_2,wl_3,wl_4,wl_5,wl_6,wl_7,wl_8,wl_9,wl_10,wl_11,wl_12,wl_13,wl_14,wl_15,wl_16,wl_17,wl_18,wl_19,wl_20,wl_21,wl_22,wl_23,wl_24,wl_25,wl_26,wl_27,wl_28,wl_29,wl_30,wl_31,wl_32,wl_33,wl_34,wl_35,wl_36,wl_37,wl_38,wl_39,wl_40,wl_41,wl_42,wl_43,wl_44,wl_45,wl_46,wl_47,wl_48,wl_49,wl_50,wl_51,wl_52,wl_53,wl_54,wl_55,wl_56,wl_57,wl_58,wl_59,wl_60,wl_61,wl_62,wl_63,wl_64,wl_65,wl_66,wl_67,wl_68,wl_69,wl_70,wl_71,wl_72,wl_73,wl_74,wl_75,wl_76,wl_77,wl_78,wl_79,wl_80,wl_81,wl_82,wl_83,wl_84,wl_85,wl_86,wl_87,wl_88,wl_89,wl_90,wl_91,wl_92,wl_93,wl_94,wl_95,wl_96,wl_97,wl_98,wl_99,wl_100,wl_101,wl_102,wl_103,wl_104,wl_105,wl_106,wl_107,wl_108,wl_109,wl_110,wl_111,wl_112,wl_113,wl_114,wl_115,wl_116,wl_117,wl_118,wl_119,wl_120,wl_121,wl_122,wl_123,wl_124,wl_125,wl_126,wl_127,wl_128,wl_129,wl_130,wl_131,wl_132,wl_133,wl_134,wl_135,wl_136,wl_137,wl_138,wl_139,wl_140,wl_141,wl_142,wl_143,wl_144,wl_145,wl_146,wl_147,wl_148,wl_149,wl_150,wl_151,wl_152,wl_153,wl_154,wl_155,wl_156,wl_157,wl_158,wl_159,wl_160,wl_161,wl_162,wl_163,wl_164,wl_165,wl_166,wl_167,wl_168,wl_169,wl_170,wl_171,wl_172,wl_173,wl_174,wl_175,wl_176,wl_177,wl_178,wl_179,wl_180,wl_181,wl_182,wl_183,wl_184,wl_185,wl_186,wl_187,wl_188,wl_189,wl_190,wl_191,wl_192,wl_193,wl_194,wl_195,wl_196,wl_197,wl_198,wl_199,wl_200,wl_201,wl_202,wl_203,wl_204,wl_205,wl_206,wl_207,wl_208,wl_209,wl_210,wl_211,wl_212,wl_213,wl_214,wl_215,wl_216,wl_217,wl_218,wl_219,wl_220,wl_221,wl_222,wl_223,wl_224,wl_225,wl_226,wl_227,wl_228,wl_229,wl_230,wl_231,wl_232,wl_233,wl_234,wl_235,wl_236,wl_237,wl_238,wl_239,wl_240,wl_241,wl_242,wl_243,wl_244,wl_245,wl_246,wl_247,wl_248,wl_249,wl_250,wl_251,wl_252,wl_253,wl_254,wl_255,wl_256,wl_257,wl_258,wl_259,wl_260,wl_261,wl_262,wl_263,wl_264,wl_265,wl_266,wl_267,wl_268,wl_269,wl_270,wl_271,wl_272,wl_273,wl_274,wl_275,wl_276,wl_277,wl_278,wl_279,wl_280,wl_281,wl_282,wl_283
planet_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1,Unnamed: 157_level_1,Unnamed: 158_level_1,Unnamed: 159_level_1,Unnamed: 160_level_1,Unnamed: 161_level_1,Unnamed: 162_level_1,Unnamed: 163_level_1,Unnamed: 164_level_1,Unnamed: 165_level_1,Unnamed: 166_level_1,Unnamed: 167_level_1,Unnamed: 168_level_1,Unnamed: 169_level_1,Unnamed: 170_level_1,Unnamed: 171_level_1,Unnamed: 172_level_1,Unnamed: 173_level_1,Unnamed: 174_level_1,Unnamed: 175_level_1,Unnamed: 176_level_1,Unnamed: 177_level_1,Unnamed: 178_level_1,Unnamed: 179_level_1,Unnamed: 180_level_1,Unnamed: 181_level_1,Unnamed: 182_level_1,Unnamed: 183_level_1,Unnamed: 184_level_1,Unnamed: 185_level_1,Unnamed: 186_level_1,Unnamed: 187_level_1,Unnamed: 188_level_1,Unnamed: 189_level_1,Unnamed: 190_level_1,Unnamed: 191_level_1,Unnamed: 192_level_1,Unnamed: 193_level_1,Unnamed: 194_level_1,Unnamed: 195_level_1,Unnamed: 196_level_1,Unnamed: 197_level_1,Unnamed: 198_level_1,Unnamed: 199_level_1,Unnamed: 200_level_1,Unnamed: 201_level_1,Unnamed: 202_level_1,Unnamed: 203_level_1,Unnamed: 204_level_1,Unnamed: 205_level_1,Unnamed: 206_level_1,Unnamed: 207_level_1,Unnamed: 208_level_1,Unnamed: 209_level_1,Unnamed: 210_level_1,Unnamed: 211_level_1,Unnamed: 212_level_1,Unnamed: 213_level_1,Unnamed: 214_level_1,Unnamed: 215_level_1,Unnamed: 216_level_1,Unnamed: 217_level_1,Unnamed: 218_level_1,Unnamed: 219_level_1,Unnamed: 220_level_1,Unnamed: 221_level_1,Unnamed: 222_level_1,Unnamed: 223_level_1,Unnamed: 224_level_1,Unnamed: 225_level_1,Unnamed: 226_level_1,Unnamed: 227_level_1,Unnamed: 228_level_1,Unnamed: 229_level_1,Unnamed: 230_level_1,Unnamed: 231_level_1,Unnamed: 232_level_1,Unnamed: 233_level_1,Unnamed: 234_level_1,Unnamed: 235_level_1,Unnamed: 236_level_1,Unnamed: 237_level_1,Unnamed: 238_level_1,Unnamed: 239_level_1,Unnamed: 240_level_1,Unnamed: 241_level_1,Unnamed: 242_level_1,Unnamed: 243_level_1,Unnamed: 244_level_1,Unnamed: 245_level_1,Unnamed: 246_level_1,Unnamed: 247_level_1,Unnamed: 248_level_1,Unnamed: 249_level_1,Unnamed: 250_level_1,Unnamed: 251_level_1,Unnamed: 252_level_1,Unnamed: 253_level_1,Unnamed: 254_level_1,Unnamed: 255_level_1,Unnamed: 256_level_1,Unnamed: 257_level_1,Unnamed: 258_level_1,Unnamed: 259_level_1,Unnamed: 260_level_1,Unnamed: 261_level_1,Unnamed: 262_level_1,Unnamed: 263_level_1,Unnamed: 264_level_1,Unnamed: 265_level_1,Unnamed: 266_level_1,Unnamed: 267_level_1,Unnamed: 268_level_1,Unnamed: 269_level_1,Unnamed: 270_level_1,Unnamed: 271_level_1,Unnamed: 272_level_1,Unnamed: 273_level_1,Unnamed: 274_level_1,Unnamed: 275_level_1,Unnamed: 276_level_1,Unnamed: 277_level_1,Unnamed: 278_level_1,Unnamed: 279_level_1,Unnamed: 280_level_1,Unnamed: 281_level_1,Unnamed: 282_level_1,Unnamed: 283_level_1
34983,0.018291,0.018088,0.018087,0.018085,0.018084,0.018084,0.018084,0.018084,0.018085,0.018084,0.018083,0.01808,0.018076,0.018072,0.018068,0.018063,0.01806,0.01806,0.018062,0.018069,0.018079,0.018091,0.018103,0.018111,0.018115,0.018117,0.018119,0.018123,0.018129,0.018136,0.018139,0.018137,0.018133,0.018131,0.018131,0.018132,0.018133,0.018134,0.018135,0.018136,0.018137,0.018136,0.018134,0.018133,0.018132,0.018132,0.018131,0.018129,0.018127,0.018126,0.018126,0.018126,0.018124,0.018119,0.018115,0.018111,0.018108,0.018106,0.018104,0.018102,0.0181,0.018098,0.018096,0.018095,0.018095,0.018095,0.018095,0.018094,0.018092,0.01809,0.018089,0.018088,0.018087,0.018084,0.018082,0.01808,0.018079,0.018079,0.01808,0.01808,0.018079,0.018076,0.018074,0.018073,0.018076,0.018082,0.018093,0.018105,0.018116,0.018124,0.01813,0.018134,0.018136,0.018136,0.018136,0.018137,0.018137,0.018137,0.018138,0.01814,0.018144,0.018147,0.018148,0.018148,0.01815,0.018152,0.018154,0.018156,0.018157,0.018157,0.018158,0.018158,0.018158,0.018157,0.018158,0.018159,0.01816,0.018161,0.018162,0.018163,0.018164,0.018167,0.018169,0.018171,0.018171,0.018171,0.01817,0.01817,0.018169,0.018168,0.018167,0.018167,0.018169,0.018173,0.018174,0.018171,0.018167,0.018165,0.018164,0.018164,0.018164,0.018163,0.018162,0.018162,0.018161,0.01816,0.018159,0.01816,0.018161,0.018163,0.018165,0.018166,0.018168,0.018171,0.018173,0.018174,0.018176,0.018178,0.018178,0.018175,0.018173,0.018171,0.01817,0.018169,0.018168,0.018166,0.018165,0.018165,0.018163,0.018159,0.018155,0.018154,0.018152,0.018148,0.018144,0.018142,0.018142,0.018146,0.018155,0.018172,0.018192,0.018207,0.018211,0.018208,0.0182,0.01819,0.018179,0.018169,0.018161,0.018156,0.018152,0.01815,0.018147,0.018147,0.01815,0.018154,0.018156,0.018157,0.018157,0.018157,0.018156,0.018155,0.018155,0.018154,0.018153,0.018153,0.018154,0.018155,0.018155,0.018153,0.01815,0.018148,0.018148,0.018148,0.018148,0.018148,0.018148,0.018146,0.018145,0.018145,0.018144,0.018144,0.018143,0.018139,0.018136,0.018135,0.018133,0.018129,0.018125,0.018123,0.018121,0.01812,0.01812,0.018119,0.018117,0.018115,0.018113,0.018112,0.01811,0.018108,0.018106,0.018104,0.018103,0.018101,0.0181,0.018101,0.0181,0.018098,0.018098,0.018097,0.018095,0.018094,0.018094,0.018094,0.018095,0.018096,0.018097,0.018098,0.018097,0.018097,0.018096,0.018096,0.018096,0.018095,0.018096,0.018096,0.018096,0.018097,0.018097,0.018098,0.018099,0.018101,0.018106,0.018109,0.018112,0.018118,0.018123,0.018125,0.018127,0.01813,0.018134,0.018138,0.018142
1873185,0.006347,0.006343,0.006343,0.006343,0.006343,0.006343,0.006343,0.006342,0.006342,0.006341,0.006341,0.00634,0.00634,0.00634,0.00634,0.006341,0.006342,0.006342,0.006342,0.006341,0.006341,0.00634,0.006341,0.006341,0.006341,0.006341,0.006341,0.006341,0.006341,0.006341,0.006341,0.006341,0.006342,0.006342,0.006342,0.006342,0.006342,0.006342,0.006342,0.006341,0.006341,0.00634,0.00634,0.00634,0.00634,0.00634,0.00634,0.00634,0.006339,0.006339,0.006339,0.006339,0.00634,0.006341,0.006342,0.006343,0.006343,0.006342,0.006342,0.006342,0.006341,0.006341,0.006341,0.006342,0.006342,0.006342,0.006342,0.006342,0.006342,0.006341,0.006341,0.00634,0.00634,0.00634,0.006341,0.006341,0.00634,0.00634,0.00634,0.006339,0.00634,0.00634,0.00634,0.00634,0.006341,0.006341,0.006341,0.006341,0.00634,0.00634,0.00634,0.006339,0.006339,0.006339,0.00634,0.00634,0.00634,0.00634,0.00634,0.00634,0.00634,0.00634,0.00634,0.00634,0.006339,0.006339,0.00634,0.00634,0.00634,0.00634,0.00634,0.006341,0.006342,0.006342,0.006342,0.006342,0.006342,0.006342,0.006342,0.006342,0.006342,0.006341,0.006341,0.006341,0.006341,0.006341,0.006341,0.006341,0.006341,0.006341,0.006341,0.006341,0.00634,0.00634,0.00634,0.00634,0.00634,0.00634,0.006339,0.00634,0.00634,0.006341,0.006341,0.006341,0.006341,0.006341,0.006341,0.006341,0.00634,0.00634,0.00634,0.00634,0.00634,0.00634,0.00634,0.00634,0.00634,0.00634,0.00634,0.006341,0.00634,0.00634,0.00634,0.006339,0.006339,0.006339,0.006339,0.006339,0.006339,0.006339,0.006339,0.006339,0.006339,0.006339,0.00634,0.00634,0.00634,0.00634,0.006339,0.006339,0.006339,0.006338,0.006338,0.006338,0.006338,0.006339,0.006338,0.006338,0.006339,0.006338,0.006338,0.006338,0.006338,0.006338,0.006338,0.006338,0.006338,0.006338,0.006338,0.006338,0.006338,0.006338,0.006338,0.006338,0.006338,0.006338,0.006338,0.006337,0.006337,0.006337,0.006337,0.006337,0.006338,0.006338,0.006338,0.006338,0.006338,0.006338,0.006337,0.006337,0.006338,0.006338,0.006337,0.006337,0.006338,0.006338,0.006338,0.006338,0.006338,0.006337,0.006337,0.006337,0.006337,0.006337,0.006337,0.006337,0.006338,0.006339,0.006341,0.006341,0.006341,0.006341,0.006341,0.00634,0.00634,0.00634,0.00634,0.00634,0.00634,0.00634,0.006339,0.006339,0.006339,0.006339,0.006339,0.006339,0.006339,0.006339,0.006339,0.006339,0.006338,0.006338,0.006338,0.006338,0.006339,0.006341,0.006341,0.006341,0.006341,0.006341,0.00634,0.00634,0.00634,0.00634,0.00634,0.006339,0.006339,0.006339,0.006339,0.006339,0.006339,0.006339,0.006339
3849793,0.046061,0.046139,0.04613,0.046117,0.046107,0.046105,0.046109,0.046112,0.046111,0.046104,0.046095,0.046088,0.046082,0.046078,0.046075,0.046076,0.046078,0.046079,0.04608,0.046081,0.046083,0.046086,0.04609,0.046095,0.046103,0.046111,0.046121,0.046137,0.046164,0.046198,0.046225,0.046234,0.046228,0.046225,0.046235,0.046254,0.046269,0.046278,0.046288,0.0463,0.04631,0.04631,0.046302,0.046296,0.046302,0.046321,0.046341,0.046352,0.046353,0.046354,0.046357,0.046353,0.046329,0.046294,0.046267,0.046255,0.046251,0.046247,0.046244,0.046241,0.046234,0.04622,0.046202,0.046187,0.046182,0.046188,0.046198,0.0462,0.046191,0.046174,0.04616,0.046154,0.04615,0.046145,0.046146,0.046155,0.046164,0.046163,0.046156,0.046157,0.046168,0.046177,0.046176,0.046178,0.046194,0.046219,0.046241,0.046252,0.046261,0.046289,0.046331,0.046362,0.046359,0.046333,0.046312,0.046302,0.04629,0.046273,0.046268,0.046289,0.046324,0.046339,0.046318,0.04628,0.046257,0.046266,0.046299,0.046327,0.046321,0.046293,0.046277,0.046278,0.046271,0.046247,0.046222,0.04621,0.04621,0.046211,0.046201,0.04618,0.046156,0.046136,0.046124,0.04612,0.04612,0.046117,0.046116,0.046127,0.04614,0.04614,0.046131,0.046129,0.046136,0.046141,0.046136,0.046127,0.046122,0.046127,0.046138,0.046151,0.046166,0.046183,0.046199,0.046211,0.046216,0.046221,0.046235,0.046262,0.046291,0.046315,0.046339,0.046373,0.046418,0.046461,0.046486,0.046492,0.046496,0.046506,0.046509,0.046497,0.046483,0.046481,0.046497,0.046515,0.046515,0.046505,0.046498,0.046487,0.046468,0.046455,0.046456,0.046455,0.046439,0.046421,0.046406,0.046392,0.04638,0.046392,0.046447,0.046545,0.046651,0.04672,0.046737,0.046717,0.046675,0.046622,0.046565,0.046517,0.046488,0.046471,0.046453,0.046434,0.046419,0.046413,0.046423,0.046442,0.046454,0.046461,0.046465,0.046463,0.046458,0.046452,0.046449,0.046442,0.046432,0.046437,0.046451,0.046456,0.046456,0.046445,0.046427,0.046417,0.046411,0.04641,0.046415,0.046423,0.046432,0.04643,0.04641,0.046394,0.046383,0.046364,0.046355,0.046357,0.046356,0.046351,0.046341,0.046326,0.046316,0.046309,0.0463,0.046291,0.046287,0.046282,0.046273,0.04626,0.046251,0.046252,0.046253,0.04624,0.046224,0.046225,0.046232,0.046225,0.046219,0.046217,0.046203,0.046193,0.046196,0.046189,0.046175,0.046169,0.046168,0.046169,0.046162,0.046156,0.046166,0.046179,0.046182,0.046171,0.046156,0.046153,0.04616,0.046158,0.04615,0.046148,0.046147,0.046142,0.046136,0.046131,0.046125,0.04613,0.046143,0.046144,0.046133,0.046131,0.046138,0.046141,0.046147,0.046147,0.046139,0.046134,0.046133
8456603,0.015363,0.015387,0.015385,0.015385,0.015385,0.015385,0.015384,0.015383,0.015383,0.015384,0.015385,0.015385,0.015386,0.015387,0.015388,0.01539,0.015393,0.015396,0.015399,0.015403,0.015408,0.015412,0.015417,0.015421,0.015426,0.015433,0.01544,0.01545,0.015462,0.015475,0.015485,0.01549,0.015491,0.015492,0.015496,0.015502,0.015508,0.015511,0.015512,0.015514,0.015515,0.015516,0.015516,0.015517,0.015519,0.015521,0.015525,0.015527,0.015528,0.015528,0.015526,0.015525,0.015522,0.015518,0.015515,0.015511,0.015508,0.015504,0.0155,0.015496,0.015493,0.01549,0.015487,0.015484,0.01548,0.015477,0.015473,0.015468,0.015463,0.015459,0.015457,0.015455,0.015453,0.015451,0.015449,0.015449,0.01545,0.01545,0.015448,0.015445,0.015444,0.015443,0.015443,0.015442,0.015441,0.01544,0.015437,0.015434,0.015431,0.015428,0.015425,0.015422,0.015418,0.015414,0.015412,0.01541,0.015408,0.015406,0.015404,0.015404,0.015407,0.01541,0.015412,0.015411,0.015409,0.015408,0.015409,0.015411,0.01541,0.015407,0.015403,0.015399,0.015397,0.015397,0.015397,0.015398,0.015398,0.015398,0.015399,0.015402,0.015405,0.015407,0.015406,0.015404,0.015402,0.015402,0.015405,0.015409,0.015413,0.015416,0.015418,0.01542,0.015425,0.015431,0.015435,0.015439,0.015444,0.015451,0.015459,0.015467,0.015477,0.01549,0.015502,0.015512,0.015517,0.015521,0.015526,0.015534,0.015545,0.015554,0.015558,0.015559,0.015562,0.015567,0.015571,0.015572,0.015575,0.015579,0.015583,0.015584,0.015585,0.015586,0.015588,0.01559,0.015589,0.015585,0.015581,0.015578,0.015576,0.015575,0.015575,0.015573,0.01557,0.015565,0.01556,0.015556,0.015554,0.015557,0.015569,0.015592,0.015615,0.015627,0.015631,0.01563,0.015627,0.015619,0.015609,0.0156,0.015592,0.015585,0.015579,0.015574,0.01557,0.015567,0.015567,0.015569,0.01557,0.015572,0.015574,0.015574,0.015573,0.015573,0.015574,0.015571,0.015569,0.01557,0.015573,0.015574,0.015576,0.015576,0.015576,0.015574,0.015574,0.015574,0.015572,0.015567,0.015565,0.015566,0.015566,0.015565,0.015562,0.015559,0.015555,0.015551,0.01555,0.01555,0.01555,0.015549,0.015547,0.015545,0.015543,0.015541,0.015539,0.015537,0.015535,0.015534,0.015532,0.01553,0.015529,0.015526,0.015521,0.015518,0.015516,0.015513,0.01551,0.015508,0.015508,0.015506,0.015505,0.015505,0.015504,0.015501,0.015497,0.015496,0.015494,0.015493,0.015493,0.015494,0.015491,0.015486,0.015484,0.015484,0.015482,0.01548,0.015482,0.015481,0.015478,0.015477,0.015475,0.015472,0.01547,0.015471,0.01547,0.015471,0.015471,0.015467,0.015465,0.015465,0.015464,0.015461,0.01546,0.01546,0.01546


## Split into train and validation sets

In [15]:
n = round(.8 * len(df_train))
n

3

In [16]:
train_AIRS = data_train_AIRS[:n]
val_AIRS = data_train_AIRS[n:]
print(len(train_AIRS), len(val_AIRS))

train_FGS = data_train_FGS[:n]
val_FGS = data_train_FGS[n:]
print(len(train_FGS), len(val_FGS))

train_labels = df_train.iloc[:n,:] 
val_labels = df_train.iloc[n:,:]
print(train_labels.shape, val_labels.shape)

3 1097
3 1097
(3, 283) (1, 283)


## Define the model

In [17]:
print(train_AIRS.shape)
print(train_labels.shape)

(3, 187, 282, 32)
(3, 283)


In [18]:
inputs = Input(shape=(187, 282, 32), name='inputs')

x = Conv2D(32, (3, 3), activation='relu')(inputs)
x = MaxPooling2D((2, 2))(x)

x = Conv2D(64, (3, 3), activation='relu')(x)
x = MaxPooling2D((2, 2))(x)

x = Conv2D(64, (3, 3), activation='relu')(x)
x = Flatten()(x)

x = Dense(64, activation='relu')(x)

# Two output heads
mean_output = Dense(283, activation='linear', name='mean')(x)  
log_std_output = Dense(283, activation='linear', name='log_std')(x) 
std_output = Lambda(lambda x: tf.exp(0.5 * x), name='std')(log_std_output)

# Concatenate outputs for submission
outputs = Concatenate(name='outputs')([mean_output, std_output])

model = Model(inputs=inputs, outputs=outputs)
model.summary()

I0000 00:00:1759035224.654307      19 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 15513 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0


## Compile and train the model

In [19]:
def nll_loss(y_true, y_pred):
    mu, std = y_pred[:, :283], y_pred[:, 283:]
    return tf.reduce_mean(0.5 * tf.math.log(2 * np.pi * std**2) + 0.5 * ((y_true - mu)**2 / std**2))

model.compile(optimizer = 'adam', loss = nll_loss)

## Generate predictions

In [20]:
# Generate predictions
predictions = model.predict(data_test_AIRS)  
means = predictions[:, :283]  
stds = predictions[:, 283:] 

# Create submission DataFrame
df_sample = pd.read_csv(path_folder + 'sample_submission.csv')
df_submission = pd.DataFrame(columns = df_sample.columns)  
df_submission['planet_id'] = test_index  
df_submission.iloc[:, 1:284] = means  
df_submission.iloc[:, 284:] = stds   

# Replace inf and NaN values
df_submission = df_submission.replace([np.inf, -np.inf], np.nan)
df_submission = df_submission.fillna(0)

# Verify
print(df_submission.shape) 
df_submission.head()

I0000 00:00:1759035227.696369    1174 service.cc:148] XLA service 0x78cdd8005cd0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1759035227.696955    1174 service.cc:156]   StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0
I0000 00:00:1759035227.816730    1174 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


I0000 00:00:1759035229.246459    1174 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
  df_submission = df_submission.replace([np.inf, -np.inf], np.nan)


(1, 567)


Unnamed: 0,planet_id,wl_1,wl_2,wl_3,wl_4,wl_5,wl_6,wl_7,wl_8,wl_9,wl_10,wl_11,wl_12,wl_13,wl_14,wl_15,wl_16,wl_17,wl_18,wl_19,wl_20,wl_21,wl_22,wl_23,wl_24,wl_25,wl_26,wl_27,wl_28,wl_29,wl_30,wl_31,wl_32,wl_33,wl_34,wl_35,wl_36,wl_37,wl_38,wl_39,wl_40,wl_41,wl_42,wl_43,wl_44,wl_45,wl_46,wl_47,wl_48,wl_49,wl_50,wl_51,wl_52,wl_53,wl_54,wl_55,wl_56,wl_57,wl_58,wl_59,wl_60,wl_61,wl_62,wl_63,wl_64,wl_65,wl_66,wl_67,wl_68,wl_69,wl_70,wl_71,wl_72,wl_73,wl_74,wl_75,wl_76,wl_77,wl_78,wl_79,wl_80,wl_81,wl_82,wl_83,wl_84,wl_85,wl_86,wl_87,wl_88,wl_89,wl_90,wl_91,wl_92,wl_93,wl_94,wl_95,wl_96,wl_97,wl_98,wl_99,wl_100,wl_101,wl_102,wl_103,wl_104,wl_105,wl_106,wl_107,wl_108,wl_109,wl_110,wl_111,wl_112,wl_113,wl_114,wl_115,wl_116,wl_117,wl_118,wl_119,wl_120,wl_121,wl_122,wl_123,wl_124,wl_125,wl_126,wl_127,wl_128,wl_129,wl_130,wl_131,wl_132,wl_133,wl_134,wl_135,wl_136,wl_137,wl_138,wl_139,wl_140,wl_141,wl_142,wl_143,wl_144,wl_145,wl_146,wl_147,wl_148,wl_149,wl_150,wl_151,wl_152,wl_153,wl_154,wl_155,wl_156,wl_157,wl_158,wl_159,wl_160,wl_161,wl_162,wl_163,wl_164,wl_165,wl_166,wl_167,wl_168,wl_169,wl_170,wl_171,wl_172,wl_173,wl_174,wl_175,wl_176,wl_177,wl_178,wl_179,wl_180,wl_181,wl_182,wl_183,wl_184,wl_185,wl_186,wl_187,wl_188,wl_189,wl_190,wl_191,wl_192,wl_193,wl_194,wl_195,wl_196,wl_197,wl_198,wl_199,wl_200,wl_201,wl_202,wl_203,wl_204,wl_205,wl_206,wl_207,wl_208,wl_209,wl_210,wl_211,wl_212,wl_213,wl_214,wl_215,wl_216,wl_217,wl_218,wl_219,wl_220,wl_221,wl_222,wl_223,wl_224,wl_225,wl_226,wl_227,wl_228,wl_229,wl_230,wl_231,wl_232,wl_233,wl_234,wl_235,wl_236,wl_237,wl_238,wl_239,wl_240,wl_241,wl_242,wl_243,wl_244,wl_245,wl_246,wl_247,wl_248,wl_249,wl_250,wl_251,wl_252,wl_253,wl_254,wl_255,wl_256,wl_257,wl_258,wl_259,wl_260,wl_261,wl_262,wl_263,wl_264,wl_265,wl_266,wl_267,wl_268,wl_269,wl_270,wl_271,wl_272,wl_273,wl_274,wl_275,wl_276,wl_277,wl_278,wl_279,wl_280,wl_281,wl_282,wl_283,sigma_1,sigma_2,sigma_3,sigma_4,sigma_5,sigma_6,sigma_7,sigma_8,sigma_9,sigma_10,sigma_11,sigma_12,sigma_13,sigma_14,sigma_15,sigma_16,sigma_17,sigma_18,sigma_19,sigma_20,sigma_21,sigma_22,sigma_23,sigma_24,sigma_25,sigma_26,sigma_27,sigma_28,sigma_29,sigma_30,sigma_31,sigma_32,sigma_33,sigma_34,sigma_35,sigma_36,sigma_37,sigma_38,sigma_39,sigma_40,sigma_41,sigma_42,sigma_43,sigma_44,sigma_45,sigma_46,sigma_47,sigma_48,sigma_49,sigma_50,sigma_51,sigma_52,sigma_53,sigma_54,sigma_55,sigma_56,sigma_57,sigma_58,sigma_59,sigma_60,sigma_61,sigma_62,sigma_63,sigma_64,sigma_65,sigma_66,sigma_67,sigma_68,sigma_69,sigma_70,sigma_71,sigma_72,sigma_73,sigma_74,sigma_75,sigma_76,sigma_77,sigma_78,sigma_79,sigma_80,sigma_81,sigma_82,sigma_83,sigma_84,sigma_85,sigma_86,sigma_87,sigma_88,sigma_89,sigma_90,sigma_91,sigma_92,sigma_93,sigma_94,sigma_95,sigma_96,sigma_97,sigma_98,sigma_99,sigma_100,sigma_101,sigma_102,sigma_103,sigma_104,sigma_105,sigma_106,sigma_107,sigma_108,sigma_109,sigma_110,sigma_111,sigma_112,sigma_113,sigma_114,sigma_115,sigma_116,sigma_117,sigma_118,sigma_119,sigma_120,sigma_121,sigma_122,sigma_123,sigma_124,sigma_125,sigma_126,sigma_127,sigma_128,sigma_129,sigma_130,sigma_131,sigma_132,sigma_133,sigma_134,sigma_135,sigma_136,sigma_137,sigma_138,sigma_139,sigma_140,sigma_141,sigma_142,sigma_143,sigma_144,sigma_145,sigma_146,sigma_147,sigma_148,sigma_149,sigma_150,sigma_151,sigma_152,sigma_153,sigma_154,sigma_155,sigma_156,sigma_157,sigma_158,sigma_159,sigma_160,sigma_161,sigma_162,sigma_163,sigma_164,sigma_165,sigma_166,sigma_167,sigma_168,sigma_169,sigma_170,sigma_171,sigma_172,sigma_173,sigma_174,sigma_175,sigma_176,sigma_177,sigma_178,sigma_179,sigma_180,sigma_181,sigma_182,sigma_183,sigma_184,sigma_185,sigma_186,sigma_187,sigma_188,sigma_189,sigma_190,sigma_191,sigma_192,sigma_193,sigma_194,sigma_195,sigma_196,sigma_197,sigma_198,sigma_199,sigma_200,sigma_201,sigma_202,sigma_203,sigma_204,sigma_205,sigma_206,sigma_207,sigma_208,sigma_209,sigma_210,sigma_211,sigma_212,sigma_213,sigma_214,sigma_215,sigma_216,sigma_217,sigma_218,sigma_219,sigma_220,sigma_221,sigma_222,sigma_223,sigma_224,sigma_225,sigma_226,sigma_227,sigma_228,sigma_229,sigma_230,sigma_231,sigma_232,sigma_233,sigma_234,sigma_235,sigma_236,sigma_237,sigma_238,sigma_239,sigma_240,sigma_241,sigma_242,sigma_243,sigma_244,sigma_245,sigma_246,sigma_247,sigma_248,sigma_249,sigma_250,sigma_251,sigma_252,sigma_253,sigma_254,sigma_255,sigma_256,sigma_257,sigma_258,sigma_259,sigma_260,sigma_261,sigma_262,sigma_263,sigma_264,sigma_265,sigma_266,sigma_267,sigma_268,sigma_269,sigma_270,sigma_271,sigma_272,sigma_273,sigma_274,sigma_275,sigma_276,sigma_277,sigma_278,sigma_279,sigma_280,sigma_281,sigma_282,sigma_283
0,[1103775],32405.324219,-14072.595703,-8842.513672,17101.6875,64343.96875,40036.089844,5280.583008,-7124.664062,-33879.09375,41671.296875,-50435.875,-30194.919922,-16417.863281,18781.792969,40990.640625,-17938.027344,-18846.070312,-27272.421875,-18916.203125,-5359.64502,13698.466797,14731.289062,-20081.53125,-4671.774414,52484.367188,33724.964844,-8003.621094,44786.648438,-33809.015625,31775.824219,-2735.892578,-15520.853516,-21808.464844,53383.085938,7486.116211,-13970.875,27181.775391,-9788.988281,-28635.59375,38074.988281,-20397.90625,-24976.566406,-45266.671875,6990.03418,-36610.640625,-4506.371094,66696.0,-2245.469727,35547.117188,63612.480469,42799.589844,49408.109375,17242.964844,-26570.171875,-23354.957031,15003.535156,-22296.703125,-39970.347656,48318.253906,17167.361328,18922.839844,-41244.929688,14026.004883,-4893.797852,-5768.361328,-1743.837891,3994.205078,-14993.135742,-4371.292969,50783.558594,15838.493164,61402.917969,-59061.792969,-22524.011719,-51663.132812,-22080.382812,14640.043945,21688.363281,-35003.234375,47046.96875,42680.324219,26516.289062,24112.908203,-2727.915771,44718.03125,-23530.59375,-1079.392578,-33139.101562,-41763.179688,41569.839844,25238.382812,-15176.318359,-23338.542969,38713.699219,17840.273438,28287.65625,13433.046875,-28004.695312,-21134.1875,-44760.578125,444.905273,-51070.34375,1620.472656,30890.492188,-20497.078125,69795.953125,39356.234375,-31371.396484,-4971.8125,9008.047852,4092.878906,34393.0,-40025.40625,-18013.845703,-6352.96875,-29322.082031,1497.807129,29257.542969,15454.594727,60424.289062,20112.228516,51891.609375,84994.523438,-8398.849609,15060.941406,-55490.09375,-24271.433594,-24454.044922,-13899.306641,-20768.439453,-28385.349609,15432.728516,50489.820312,54585.929688,-569.980469,-17937.03125,32897.375,34486.601562,3148.344727,-12516.695312,-38007.773438,15777.222656,-21471.933594,-26017.914062,-11813.496094,6470.041016,-4386.063477,-2983.484375,-32462.556641,39272.09375,20415.724609,-46466.605469,-24186.773438,18667.203125,-50366.5,-14038.441406,-6387.921875,-34343.542969,51860.625,-26700.673828,51930.742188,18194.326172,4108.738281,-14308.957031,-46091.445312,-7357.246094,19782.121094,20938.171875,-10774.076172,43971.914062,42401.335938,-22057.289062,-15406.21582,46671.734375,-36089.964844,17383.080078,48711.8125,52718.175781,22815.792969,-27754.585938,-5330.438477,-24691.960938,34872.128906,34467.488281,-17954.976562,8051.277344,-33220.109375,-21836.921875,-54857.609375,-21359.384766,10808.326172,8473.292969,-13836.967773,17607.203125,-57937.867188,489.075195,21361.984375,-11332.878906,49721.515625,12975.334961,5047.832031,-16351.242188,21799.320312,42580.011719,-39746.226562,-39600.421875,-26925.953125,-26781.185547,7661.290039,-23661.111328,-456.917969,58364.703125,-76046.890625,30688.957031,-28925.222656,-53700.5,-28119.560547,-34099.871094,8886.900391,28582.816406,16481.476562,13511.949219,-11630.319336,13905.859375,27438.125,30156.048828,-14371.445312,-52383.101562,-51823.242188,-13191.258789,-46128.539062,-20365.582031,-62600.355469,17068.539062,-74946.921875,-44284.804688,-5891.355469,-63666.679688,-32247.998047,12778.575195,-18172.285156,23785.386719,-49052.375,18664.515625,-9632.886719,23270.484375,-30534.412109,41899.003906,-8153.959961,-10770.285156,1780.447754,3047.074219,1986.847656,9907.178711,6000.87207,32962.992188,16501.865234,6650.1875,-16427.308594,-23533.449219,-39355.855469,20105.519531,26341.625,-12949.861328,729.546387,-6110.4375,9566.420898,-28748.050781,16585.605469,17627.332031,3391.730957,-27589.179688,-28394.619141,-11965.462891,32802.167969,3700.070312,15125.621094,-8465.359375,40016.617188,30725.486328,-20667.609375,13596.390625,25490.734375,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.807814e+38,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [21]:
df_submission.to_csv('/kaggle/working/submission.csv', index=False)