In [None]:
import numpy as np
import pandas as pd
import os 
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from scipy.stats import skew
from scipy.stats import kurtosis
from scipy.stats import entropy

In [None]:
def vector(data, start, end , start1 , end1):
    
    region1= data[:start]  
    
    region2=data[start:end]
    region3=data[end:start1]
    region4=data[start1:end1]

    region5=data[end1:]

    uncovered = (  np.mean(region1) + np.mean(region5)  )/2
    
    reduction1= ( uncovered - np.mean(region2)    ) / uncovered
    reduction2= ( uncovered - np.mean(region3)   )  / uncovered
    reduction3= ( uncovered - np.mean(region4)   )  / uncovered
        
    
    slope=(data[end]-data[start])/(end-start)
    slope1=(data[end1]-data[start1])/(end1-start1)
    
    nr = np.mean(data)    / np.std(data)
    nr1= np.mean(region1) / np.std(region1)
    nr2= np.mean(region2) / np.std(region2)
    nr3= np.mean(region3) / np.std(region3)
    nr4= np.mean(region4) / np.std(region4)
    nr5= np.mean(region5) / np.std(region5)
   
    skewness= skew(data)

    input_vector = np.array([ slope, slope1 , reduction1, reduction2, reduction3 ,nr,nr1,nr2,nr3,nr4,nr5 ,skewness] )

    return input_vector


In [None]:
from scipy.signal import savgol_filter

def smooth_data(data, window_size):
    return savgol_filter(data, window_size, 3)

def optimize_breakpoint(data, initial_breakpoint, window_size, buffer_size, smooth_window):
    best_breakpoint = initial_breakpoint
    best_score = float("-inf")
    midpoint = len(data) // 2
    smoothed_data = smooth_data(data, smooth_window)
#     smoothed_data=data
    for i in range(-window_size, window_size):
        new_breakpoint = initial_breakpoint + i
        if new_breakpoint > buffer_size and new_breakpoint < midpoint - buffer_size:
            region1 = data[: new_breakpoint - buffer_size]
            region2 = data[
                new_breakpoint
                + buffer_size : 2 * midpoint
                - new_breakpoint
                - buffer_size
            ]
            region3 = data[2 * midpoint - new_breakpoint + buffer_size :]

            breakpoint_region1 = smoothed_data[new_breakpoint - buffer_size: new_breakpoint + buffer_size]
            breakpoint_region2 = smoothed_data[new_breakpoint - buffer_size: new_breakpoint + buffer_size]

            mean_diff = abs(np.mean(region1) - np.mean(region2)) + abs(
                np.mean(region2) - np.mean(region3)
            )
            var_sum = np.var(region1) + np.var(region2) + np.var(region3)
            range_at_breakpoint1 = (np.max(breakpoint_region1) - np.min(breakpoint_region1))
            range_at_breakpoint2 = (np.max(breakpoint_region2) - np.min(breakpoint_region2))

            mean_range_at_breakpoint = (range_at_breakpoint1 + range_at_breakpoint2) / 2

            score = mean_diff - 0.5 * var_sum + mean_range_at_breakpoint

            if score > best_score:
                best_score = score
                best_breakpoint = new_breakpoint

                
    return best_breakpoint

In [None]:
# import torch
# import torch.nn as nn

# class WavelengthPredictor(nn.Module):
#     def __init__(self):
#         super(WavelengthPredictor, self).__init__()
#         self.model = nn.Sequential(
#             # Initial expansion from 24 to wider dimensions
#             nn.Linear(24, 64),
#             nn.BatchNorm1d(64),
#             nn.ReLU(),
#             nn.Linear(64, 128),
            
#             # First dense block
#             nn.Linear(128, 256),
#             nn.BatchNorm1d(256),
#             nn.ReLU(),
#             nn.Linear(256, 512),
#             nn.BatchNorm1d(512),
#             nn.ReLU(),
            
#             # Second dense block with residual-like double linear
#             nn.Linear(512, 512),
#             nn.Linear(512, 512),
#             nn.BatchNorm1d(512),
#             nn.ReLU(),
            
#             # Third dense block
#             nn.Linear(512, 768),
#             nn.BatchNorm1d(768),
#             nn.ReLU(),
#             nn.Linear(768, 512),
#             nn.BatchNorm1d(512),
#             nn.ReLU(),
            
#             # Fourth dense block
#             nn.Linear(512, 384),
#             nn.BatchNorm1d(384),
#             nn.ReLU(),
#             nn.Linear(384, 384),
#             nn.BatchNorm1d(384),
#             nn.ReLU(),
            
#             # Final contraction to target dimension
#             nn.Linear(384, 283)
#         )
        
#         # Initialize weights for better gradient flow
#         self._initialize_weights()
    
#     def _initialize_weights(self):
#         for m in self.modules():
#             if isinstance(m, nn.Linear):
#                 nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
#                 if m.bias is not None:
#                     nn.init.constant_(m.bias, 0)
#             elif isinstance(m, nn.BatchNorm1d):
#                 nn.init.constant_(m.weight, 1)
#                 nn.init.constant_(m.bias, 0)
    
#     def forward(self, x):
#         return self.model(x)
    

In [None]:
class WavelengthPredictor(nn.Module):
    def __init__(self, dropout_rate=0.2):
        super(WavelengthPredictor, self).__init__()
        self.model = nn.Sequential(
            # Initial layer with gradual size increase
            nn.Linear(24, 48),
            nn.BatchNorm1d(48),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            
            # Hidden layer 1
            nn.Linear(48, 96),
            nn.BatchNorm1d(96),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            
            # Hidden layer 2
            nn.Linear(96, 192),
            nn.BatchNorm1d(192),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            
            # Hidden layer 3
            nn.Linear(192, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            
            # Final layer with more gradual reduction
            nn.Linear(256, 283)
        )
        
        self._initialize_weights()
    
    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Linear):
                # Using Xavier/Glorot initialization for better gradient flow
                nn.init.xavier_normal_(m.weight)
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm1d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
    
    def forward(self, x):
        return self.model(x)

In [None]:
def planetnumber(filename):
    return int(filename.split('_')[0])

class ARIEL(Dataset):
    def __init__(self, airs_dir, fgs_dir , start , end ):
        self.airs_dir = airs_dir
        self.fgs_dir = fgs_dir
        
        self.airs_list = sorted(os.listdir(self.airs_dir), key=planetnumber)[start:end]
        self.fgs_list = sorted(os.listdir(self.fgs_dir), key=planetnumber)[start:end]
    
    def __getitem__(self, index):
        
        airs_file = os.path.join(self.airs_dir, self.airs_list[index])
        fgs_file = os.path.join(self.fgs_dir, self.fgs_list[index])
        
        planet =  planetnumber(self.airs_list[index])
        
        airs_arr = np.load(airs_file)
        fgs_arr  = np.load(fgs_file) 
        
        airs_arr=(airs_arr-np.min(airs_arr))/(np.max(airs_arr)-np.min(airs_arr))
        fgs_arr=(fgs_arr-np.min(fgs_arr))/(np.max(fgs_arr)-np.min(fgs_arr))
        
        initial_breakpoint=900
        buffer_size=80 
        smooth_window=200
        window_size=300

        airsbp = optimize_breakpoint(airs_arr,initial_breakpoint,window_size=window_size,buffer_size=buffer_size,smooth_window=smooth_window)
        fgsbp = optimize_breakpoint(fgs_arr,initial_breakpoint,window_size=window_size,buffer_size=buffer_size,smooth_window=250)

        midpoint1 = len(airs_arr) // 2
        bp1 = [airsbp, 2 * midpoint1 - airsbp]
        airs_start   =  bp1[0] - buffer_size
        airs_end     =  bp1[0] + buffer_size
        airs_start1  =  bp1[1] - buffer_size
        airs_end1    =  bp1[1] + buffer_size
        
        midpoint2 = len(fgs_arr) // 2
        bp2 = [fgsbp, 2 * midpoint2 - fgsbp]
        fgs_start  =    bp2[0] - buffer_size
        fgs_end    =    bp2[0] + buffer_size
        fgs_start1 =    bp2[1] - buffer_size
        fgs_end1   =    bp2[1] + buffer_size


        airs_vector=  vector( airs_arr,  airs_start ,  airs_end , airs_start1 , airs_end1 )
        fgs_vector =  vector( fgs_arr, fgs_start  ,   fgs_end , fgs_start1  , fgs_end1 )        
        
        
        input_vector=  np.concatenate((airs_vector , fgs_vector))

        
        labels        = pd.read_csv("/kaggle/input/ariel-data-challenge-2024/train_labels.csv")
        filtered_data = labels[labels["planet_id"] == planet].iloc[0, 1:].values
        
        
        input_vector = torch.tensor(np.array(input_vector))

        output       = torch.tensor(filtered_data )

        
        return input_vector , output  , planet
     
    def __len__(self):
        return len(self.airs_list)


In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

model = WavelengthPredictor()
model = nn.DataParallel(model)
model = model.to(device)

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-2 , weight_decay=1e-4 )
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,  factor=0.5, patience=3 , min_lr=1e-8)

# weights="/kaggle/input/arieldata/epoch140.pth"
weights=None

if weights:
    checkpoint = torch.load(weights, map_location=device)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
    start_epoch = checkpoint['epoch'] + 1
    print(f"Resuming from epoch {start_epoch}")
    print(f"Resuming learning rate: {optimizer.param_groups[0]['lr']}")

    
else:
    start_epoch = 0
    
batchsize1=24
batchsize2=2

train_data = ARIEL("/kaggle/input/arieldata/airs2k", "/kaggle/input/arieldata/fgs2k" , start=0 , end=612)
train_dataloader = DataLoader(train_data, batch_size=batchsize1, shuffle=True)

val_data = ARIEL("/kaggle/input/arieldata/airs2k", "/kaggle/input/arieldata/fgs2k" , start=612 , end=662)
val_dataloader = DataLoader(val_data, batch_size=batchsize2, shuffle=False)

In [None]:
print(len(train_dataloader))

In [None]:
print("only airs , features = 24")

In [None]:
num_params = sum(p.numel() for p in model.parameters())
print(f"Number of parameters: {num_params}")

In [None]:
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

In [None]:
epochs = 101
total = start_epoch + epochs

print("training started")

best_val_loss = float('inf')
patience = 15
no_improve = 0

for epoch in range(start_epoch, total):
    model.train()
    train_loss = 0
    val_loss = 0
    
    for input_vector, label , planet in train_dataloader:
        optimizer.zero_grad()
        input_vector = input_vector.float().to(device)
        label = label.float().to(device)
        
        out = model(input_vector)
        loss = criterion(out, label)
        
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    
    train_loss /= len(train_dataloader)
    
    if epoch%5==0 and epoch>0:
        print(f" label {(label[0][:3].cpu().detach().numpy())} , output {(out[0][:3].cpu().detach().numpy())}")

    if epoch==13:
        for param_group in optimizer.param_groups:
            param_group['lr'] = 1e-8
            print(f"Learning rate manually set to 1e-4 at epoch {epoch}")

    model.eval()
    with torch.no_grad():
        for input_vector, label , planet in val_dataloader:
            input_vector = input_vector.float().to(device)
            label = label.float().to(device)
            
            out = model(input_vector)
            loss = criterion(out, label)
            val_loss += loss.item()
    
    val_loss /= len(val_dataloader)
    
    prev = optimizer.param_groups[0]['lr']
    scheduler.step(val_loss)
    nex =optimizer.param_groups[0]['lr']
    
    if prev!=nex:
        print("LR decreased to " , nex)
    
    print(f"Epoch {epoch+1}/{total}, Train loss: {train_loss}, Val loss: {val_loss}")
    
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        no_improve = 0
        
        model_filename = f"epoch{epoch}-loss{train_loss:.8f}.pth"
        model_path = os.path.join("/kaggle/working", model_filename)
        
        checkpoint = {
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'scheduler_state_dict': scheduler.state_dict(),
            'loss': train_loss,
        }
        torch.save(checkpoint, model_path)
        print(f"Model saved at epoch {epoch}")
        
    else:
        no_improve += 1
        if no_improve == patience:
            print("Early stopping triggered at epoch", epoch)
            break

In [None]:
print(f"Current learning rate: {optimizer.param_groups[0]['lr']}")
print("done")

In [None]:
# planet=1129361124
# airs_file=f"/kaggle/input/arieldata/airs2k/{planet}_airs.npy"
# # fgs_file=f"/kaggle/input/dataset-ariel/fgs/{planet}_fgs.npy"


# airs_arr = np.load(airs_file)
# # fgs_arr  = np.load(fgs_file) 

# airs_arr=(airs_arr-np.min(airs_arr))/(np.max(airs_arr)-np.min(airs_arr))
# # fgs_arr=(fgs_arr-np.min(fgs_arr))/(np.max(fgs_arr)-np.min(fgs_arr))
# initial_breakpoint=900
# buffer_size=80 
# smooth_window=200
# window_size=300
# airsbp = optimize_breakpoint(airs_arr,initial_breakpoint,window_size=window_size,buffer_size=buffer_size,smooth_window=smooth_window)
# # fgsbp = optimize_breakpoint(fgs_arr,initial_breakpoint,window_size=window_size,buffer_size=buffer_size,smooth_window=smooth_window)
# midpoint1 = len(airs_arr) // 2
# bp1 = [airsbp, 2 * midpoint1 - airsbp]
# airs_start   =  bp1[0] - buffer_size
# airs_end     =  bp1[0] + buffer_size
# airs_start1  =  bp1[1] - buffer_size
# airs_end1    =  bp1[1] + buffer_size
# # midpoint2 = len(fgs_arr) // 2
# # bp2 = [fgsbp, 2 * midpoint2 - fgsbp]
# # fgs_start  =    bp2[0] - buffer_size
# # fgs_end    =    bp2[0] + buffer_size
# # fgs_start1 =    bp2[1] - buffer_size
# # fgs_end1   =    bp2[1] + buffer_size
# airs_vector=  vector( airs_arr,  airs_start ,  airs_end , airs_start1 , airs_end1 )
# # fgs_vector =  vector( fgs_arr, fgs_start  ,   fgs_end , fgs_start1  , fgs_end1 )        


# # input_vector=  np.concatenate((airs_vector , fgs_vector))
# # input_vector=airs_vector

# labels        = pd.read_csv("/kaggle/input/ariel-data-challenge-2024/train_labels.csv")
# filtered_data = labels[labels["planet_id"] == planet].iloc[0, 1:].values
# in_vector = torch.tensor(np.array(airs_vector))
# in_vector=in_vector.unsqueeze(0).float()
# output       = torch.tensor(filtered_data )
# print(in_vector.shape , output.shape)




In [None]:
# print(output[:5])

In [None]:
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# model = WavelengthPredictor()
# model = nn.DataParallel(model)
# model = model.to(device)

# weights="/kaggle/input/arieldata/ariel3_335.pth"

# checkpoint = torch.load(weights, map_location=device)
# model.load_state_dict(checkpoint['model_state_dict'])


In [None]:
# model.eval()
# with torch.no_grad():
#     pred = model(in_vector)

# # Print the output shape
# print(pred.shape)

In [None]:
# print(pred[0][:5])