In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

from torch.utils.data import Dataset, DataLoader
import numpy as np
import pandas as pd
import os

from sklearn.model_selection import train_test_split
from scipy.signal import savgol_filter

In [2]:
class CNN2D_AIRS(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(CNN2D_AIRS, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

    def forward(self, x):
#         x = self.pool(F.relu(self.bn1(self.conv1(x))))
#         x = self.pool(F.relu(self.bn2(self.conv2(x))))

        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))

        return x

class CNN2D_FGS(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(CNN2D_FGS, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

    def forward(self, x):
#         x = self.pool(F.relu(self.bn1(self.conv1(x))))
#         x = self.pool(F.relu(self.bn2(self.conv2(x))))

        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        return x

class Model(nn.Module):
    def __init__(self, airs_frames, fgs_frames):
        super(Model, self).__init__()
        self.cnn_airs = CNN2D_AIRS(1, 16)
        self.cnn_fgs = CNN2D_FGS(1, 16)
        
        self.lstm_airs = nn.LSTM(16 * 8 * 89, 128, batch_first=True)
        self.lstm_fgs = nn.LSTM(16 * 8 * 8, 128, batch_first=True)
        
        self.bn_airs = nn.BatchNorm1d(128)
        self.bn_fgs = nn.BatchNorm1d(128)
        
        self.fc_light_curve_airs = nn.Sequential(
            nn.Linear(airs_frames, 64),
            nn.ReLU()
#             nn.BatchNorm1d(64),

        )
        
        self.fc_light_curve_fgs = nn.Sequential(
            nn.Linear(fgs_frames, 64),
            nn.ReLU()
#             nn.BatchNorm1d(64),

        )
        
        self.fc_combined = nn.Sequential(            
            nn.Linear(128 + 128 + 64 + 64, 256),
            nn.ReLU(),
            nn.BatchNorm1d(256),
#             nn.Dropout(0.15),
            nn.Linear(256, 256),
            nn.ReLU(),
            nn.BatchNorm1d(256),
#             nn.Dropout(0.15),
            nn.Linear(256, 283)
        )

    def forward(self, airs_ch0, fgs1, light_curve_airs, light_curve_fgs):
        batch_size, frames, _, _, _ = airs_ch0.shape
        
        airs_features = self.cnn_airs(airs_ch0.view(-1, 1, 32, 356))
        airs_features = airs_features.view(batch_size, frames, -1)
        _, (airs_hidden, _) = self.lstm_airs(airs_features)
#         airs_hidden = self.bn_airs(airs_hidden.squeeze(0))
        airs_hidden = airs_hidden.squeeze(0)
        
        
        fgs_features = self.cnn_fgs(fgs1.view(-1, 1, 32, 32))
        fgs_features = fgs_features.view(batch_size, frames, -1)
        _, (fgs_hidden, _) = self.lstm_fgs(fgs_features)
#         fgs_hidden = self.bn_fgs(fgs_hidden.squeeze(0))
        fgs_hidden = fgs_hidden.squeeze(0)
        
        light_curve_airs_features = self.fc_light_curve_airs(light_curve_airs)
        light_curve_fgs_features = self.fc_light_curve_fgs(light_curve_fgs)
        
        combined_features = torch.cat((airs_hidden, fgs_hidden, light_curve_airs_features, light_curve_fgs_features), dim=1)
        
        output = self.fc_combined(combined_features)
        return output

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
airs_frames = 1125
fgs_frames = 1125
model = Model(airs_frames, fgs_frames).to(device) # for larger batch size 

In [3]:
c=0
for x in model.parameters():
    c+=x.numel()
print(c)

6877947


In [14]:
def planetnumber(filename):
    return int(filename.split('_')[0])

class ARIEL(Dataset):
    def __init__(self, airs_dir1, airs_dir2, airs_dir3, airs_dir4, fgs_dir , start , end):
        self.airs_dir1 = airs_dir1
        self.airs_dir2 = airs_dir2
        self.airs_dir3 = airs_dir3
        self.airs_dir4 = airs_dir4
        self.airs_full = os.listdir(self.airs_dir1) + os.listdir(self.airs_dir2) + os.listdir(self.airs_dir3) + os.listdir(self.airs_dir4) 
        
        self.fgs_dir = fgs_dir
        
        self.airs_list = sorted(self.airs_full, key=planetnumber)[start:end]
        self.fgs_list = sorted(os.listdir(self.fgs_dir), key=planetnumber)[start:end]
        
        self.labels = pd.read_csv("/kaggle/input/ariel-data-challenge-2024/train_labels.csv")
    
    def __getitem__(self, index):
        
        planet= self.airs_list[index]
        
        if planet in os.listdir(self.airs_dir1):
            airs_file = os.path.join(self.airs_dir1, planet )
        
        elif planet in os.listdir(self.airs_dir2):
            airs_file = os.path.join(self.airs_dir2, planet )
            
        elif planet in os.listdir(self.airs_dir3):
            airs_file = os.path.join(self.airs_dir3, planet )

        elif planet in os.listdir(self.airs_dir4):
            airs_file = os.path.join(self.airs_dir4, planet )
                    
        planet_num = planetnumber(planet)
        fgs_file = f"{self.fgs_dir}/{planet_num}_fgs.npy" 
        
        airs_arr_frames = np.load(airs_file)
        fgs_arr_frames = np.load(fgs_file)
        
        airs_arr_frames = airs_arr_frames.reshape(1125, 32, 356)
        fgs_arr_frames = fgs_arr_frames.reshape(1125, 32, 32)
        
        airs_1d = np.sum(airs_arr_frames, axis=(1, 2))
        fgs_1d = np.sum(fgs_arr_frames, axis=(1, 2))
        
        airs_1d = (airs_1d-np.min(airs_1d))/(np.max(airs_1d)-np.min(airs_1d))
        fgs_1d  = (fgs_1d-np.min(fgs_1d))/(np.max(fgs_1d)-np.min(fgs_1d))

        
        airs_arr_frames = torch.from_numpy(airs_arr_frames).float().unsqueeze(1)  # Add channel dimension
        fgs_arr_frames = torch.from_numpy(fgs_arr_frames).float().unsqueeze(1)  # Add channel dimension
        
        airs_1d = torch.from_numpy(airs_1d).float()
        fgs_1d = torch.from_numpy(fgs_1d).float()
        
        filtered_data = self.labels[self.labels["planet_id"] == planet_num].iloc[0, 1:].values
        output = torch.tensor(filtered_data).float()

        return planet ,   {
            'airs_frames': airs_arr_frames,
            'fgs_frames': fgs_arr_frames,
            'airs_1d': airs_1d,
            'fgs_1d': fgs_1d,
            'label': output
        }
     
    def __len__(self):
        return len(self.airs_list)

In [35]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

airs_frames = 1125
fgs_frames = 1125

model = Model(airs_frames, fgs_frames).to(device)
model = nn.DataParallel(model)
model = model.to(device)

criterion = nn.MSELoss()

weights = "/kaggle/input/tempnigga/epoch25.pth"
# weights = "/kaggle/input/tempnigga/weights_o_g_t_1.pth"


if weights:
    checkpoint = torch.load(weights, map_location=device , weights_only=True)
    model.load_state_dict(checkpoint['model_state_dict'])
else:
    start_epoch = 0

cuda


In [92]:
part1="/kaggle/input/arieldata/airs-p1"
part2="/kaggle/input/arieldata/airs-p2"
part3="/kaggle/input/arieldata/airs-p3"
part4="/kaggle/input/arieldata/airs-p4"

part5="/kaggle/input/arieldata/fgs-p"

b=1
data  = ARIEL(part1  , part2, part3, part4, part5, start=0, end=673)
dataloader   = DataLoader(data,   batch_size=b, shuffle=False, num_workers=8)

print(len(dataloader))

673




In [93]:
def smooth_signal(signal):
    x = np.arange(len(signal))
    smoothed = lowess(signal,x,frac=0.2,it=4,delta=0.1 * np.std(signal),return_sorted=False )
    return smoothed


In [None]:
model.eval()
c2, c1 , c3 = 0, 0 , 0

final_up=[]
final_down=[]
scales=[]
outputs=[]
l_loss=[]

i=0
with torch.no_grad():
    for  planet , batch in dataloader:
        airs_frames = batch['airs_frames'].to(device)
        fgs_frames = batch['fgs_frames'].to(device)
        airs_1d = batch['airs_1d'].to(device)
        fgs_1d = batch['fgs_1d'].to(device)
        label = batch['label'].to(device)
        
        out = model(airs_frames, fgs_frames, airs_1d, fgs_1d)        
        
        out = savgol_filter(out[0].detach().cpu().numpy(), 50, 1)
        # out = smooth_signal(out[0].detach().cpu().numpy())
        out=torch.tensor(out).unsqueeze(0).to(device)
        loss=criterion(out , label)

        # print(f"loss: {criterion(out , label)} , out: {out[0][:5]} , true: {label[0][:5]} ")

        l_loss.append(loss.item())

        up = []
        down = []
        res=[]
        for o, t in zip(out[0].cpu().numpy(), label[0].cpu().numpy()):
            # res.append(t/o)
            res.append(o/t)

        scales.append(res)
        outputs.append(out[0].cpu().numpy())
        
        for o, t in zip(out[0].cpu().numpy(), label[0].cpu().numpy()):
            
            
            if o > t  :
                up.append((o / t))
            elif t > o  :
                down.append(t / o)
        if len(up) == 283 or len(up) > len(down) or (len(down)-len(up))<30:
            if i >= 600:
                c1+=1
            else:
                c2+=1
            final_up.append(up)
        elif len(down) == 283 or len(down)>200:
            c3+= 1
            final_down.append(down)
        i+=1
        # if i==3:
        #     break


print(f"c1: {c1}, c2: {c2} , c3:{c3}")
print(len(l_loss))

In [None]:
print(np.max(l_loss))
print(np.min(l_loss))
print(np.mean(l_loss))

# import matplotlib.pyplot as plt
# x = savgol_filter(out[0].detach().cpu().numpy(), 20, 1)
# plt.plot(x)
# plt.show()
# plt.plot(label[0].detach().cpu().numpy())

In [None]:
1.1925089264701472e-05
6.160554907171355e-10
1.0524579362235455e-06


In [99]:
c=0
c2=0
ar=scales
# ar= np.abs(np.array(scales))
# print(np.shape(ar))
for x in ar:
    for y in x:
        if y<0:
            c+=1
        c2+=1
        
print(c)
print(c2)
print(len(scales))


15
190459
673


In [39]:
(15*100)/190459

0.007875710782898157

In [None]:
scales=np.array(scales)
output=np.array(outputs)
print(scales.shape)
print(output.shape)

np.save( "scales1.npy" , scales)
np.save( "output1.npy" , output)

In [None]:
print(output[0][:5])
print(scales[0][:5])

In [None]:
ar = np.load("/kaggle/input/tempnigga/output_o_g_t_1.npy")
ar1= np.load("/kaggle/input/tempnigga/scales_o_g_t_1.npy")

# ar = np.load("/kaggle/input/tempnigga/output_t_g_o_1.npy")
# ar1= np.load("/kaggle/input/tempnigga/scales_t_g_o_1.npy")


print(ar[0][:5])
print(ar1[0][:5])

In [74]:
# import numpy as np
# import pandas as pd
# from scipy.signal import savgol_filter
# from statsmodels.nonparametric.smoothers_lowess import lowess
# import matplotlib.pyplot as plt
# from sklearn.metrics import mean_squared_error, r2_score
# import time

# def smooth_signal(signal, method='lowess', **kwargs):
#     """
#     Smooth a signal using either LOWESS or Savitzky-Golay filter.
    
#     Parameters:
#     -----------
#     signal : array-like
#         Input signal (out[0])
#     method : str
#         'lowess' or 'savgol'
#     kwargs : dict
#         For LOWESS: frac (float) - window size as fraction of total points
#         For SavGol: window (int) - window size (must be odd)
#                     poly (int) - polynomial order
#     """
#     x = np.arange(len(signal))
    
#     if method == 'lowess':
#         frac = kwargs.get('frac', 0.15)
#         smoothed = lowess(
#             signal, 
#             x,
#             frac=frac,
#             it=3,
#             delta=0.1 * np.std(signal),
#             return_sorted=False
#         )
#     else:  # savgol
#         window = kwargs.get('window', 21)
#         poly = kwargs.get('poly', 3)
#         smoothed = savgol_filter(signal, window, poly)
    
#     return smoothed

# def evaluate_smoothing(original_signal, ground_truth, method_name):
#     """Evaluate smoothing quality against ground truth."""
#     mse = mean_squared_error(ground_truth, original_signal)
#     r2 = r2_score(ground_truth, original_signal)
#     return {
#         'method': method_name,
#         'mse': mse,
#         'r2_score': r2
#     }

# def compare_methods(signal, ground_truth):
#     """Compare different smoothing methods using ground truth."""
#     results = []
    
#     # Test LOWESS with different fractions
#     for frac in [0.1, 0.15, 0.2 , 0.3 , 0.4]:
#         start_time = time.time()
#         smoothed = smooth_signal(signal, method='lowess', frac=frac)
#         proc_time = time.time() - start_time
        
#         metrics = evaluate_smoothing(smoothed, ground_truth, f'LOWESS (frac={frac})')
#         metrics['processing_time'] = proc_time
#         metrics['smoothed'] = smoothed
#         results.append(metrics)
    
#     # Test Savitzky-Golay with different windows
#     for window in [11, 21, 31]:
#         start_time = time.time()
#         smoothed = smooth_signal(signal, method='savgol', 
#                                window=window, poly=3)
#         proc_time = time.time() - start_time
        
#         metrics = evaluate_smoothing(smoothed, ground_truth, f'SavGol (window={window})')
#         metrics['processing_time'] = proc_time
#         metrics['smoothed'] = smoothed
#         results.append(metrics)
    
#     return pd.DataFrame(results)

# def plot_comparison(signal, ground_truth, smoothed_signals, title="Signal Smoothing Comparison"):
#     """Plot original signal, ground truth, and smoothed versions."""
#     plt.figure(figsize=(12, 6))
#     x = np.arange(len(signal))
    
#     # Plot ground truth
#     plt.plot(x, ground_truth, 'g-', label='Ground Truth (label[0])', alpha=0.8)
    
#     # Plot original signal
#     plt.plot(x, signal, 'k.', label='Original (out[0])', alpha=0.3, markersize=2)
    
#     # Plot smoothed signals
#     for method, smoothed in smoothed_signals.items():
#         plt.plot(x, smoothed, '-', label=method, alpha=0.7)
    
#     plt.grid(True, alpha=0.3)
#     plt.legend()
#     plt.title(title)
#     plt.xlabel('Sample Index')
#     plt.ylabel('Value')
#     plt.tight_layout()

# # Main execution
# def main(out_0, label_0):
#     """
#     Main function to process and compare smoothing methods.
    
#     Parameters:
#     -----------
#     out_0 : array-like
#         The signal to be smoothed
#     label_0 : array-like
#         The ground truth signal
#     """
#     # Compare methods
#     results = compare_methods(out_0, label_0)
    
#     # Print results sorted by MSE
#     print("\nMethod Comparison (sorted by MSE):")
#     print(results[['method', 'mse', 'r2_score', 'processing_time']]
#           .sort_values('mse'))
    
#     # Get best method based on MSE
#     best_lowess_idx = results[results['method'].str.contains('LOWESS')]['mse'].idxmin()
#     best_savgol_idx = results[results['method'].str.contains('SavGol')]['mse'].idxmin()
    
#     # Plot best methods
#     smoothed_signals = {
#         'Best LOWESS': results.loc[best_lowess_idx, 'smoothed'],
#         'Best SavGol': results.loc[best_savgol_idx, 'smoothed']
#     }
    
#     plot_comparison(out_0, label_0, smoothed_signals)
#     plt.show()
    
#     # Return best smoothed signal based on MSE
#     best_method_idx = results['mse'].idxmin()
#     return results.loc[best_method_idx, 'smoothed']

# out_0 = out[0].detach().cpu().numpy()  # Your signal to be smoothed
# label_0 = label[0].detach().cpu().numpy()  # Your ground truth signal
# best_smoothed = main(out_0, label_0)

In [68]:
print(label.shape)

torch.Size([1, 283])
