In [1]:
import numpy as np
import pandas as pd
import os
import re
import copy
import pickle
from sklearn.base import clone
from sklearn.metrics import cohen_kappa_score
from sklearn.model_selection import StratifiedKFold, KFold
from scipy.optimize import minimize
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm
import polars as pl
import polars.selectors as cs
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator, FormatStrFormatter, PercentFormatter
import seaborn as sns

from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from keras.models import Model
from keras.layers import Input, Dense
from keras.optimizers import Adam
import torch
import torch.nn as nn
import torch.optim as optim

from colorama import Fore, Style
from IPython.display import clear_output
import warnings
from lightgbm import LGBMRegressor
from xgboost import XGBRegressor
from catboost import CatBoostRegressor
from sklearn.ensemble import VotingRegressor, RandomForestRegressor, GradientBoostingRegressor
from sklearn.impute import SimpleImputer, KNNImputer
from sklearn.pipeline import Pipeline
import shap
import plotly.express as px

warnings.filterwarnings('ignore')
pd.options.display.max_columns = None
SEED = 42
n_splits = 5

In [2]:
import random
import torch
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True
seed_everything(100)

# Define function

## Feature engineer for sub 1,2,3,4,5

In [3]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.feature_selection import SelectKBest, f_regression
def feature_engineering_v2(df, selector=None, imputer=None, fit=True):
    df = df.loc[:, ~df.columns.duplicated()]
    if fit: 
        y = df['sii']

    numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns
    season_cols = [col for col in df.columns if 'Season' in col]
    pciat_cols = [col for col in df.columns if 'PCIAT' in col and 'Season' not in col]
    remaining_numeric_cols = [col for col in numeric_cols if col not in pciat_cols and col not in ['sii']]
    X = df[remaining_numeric_cols]
    if np.any(np.isinf(X)):
        X = X.replace([np.inf, -np.inf], np.nan)
    if fit: 
        imputer = KNNImputer()
        imputed_data = imputer.fit_transform(X)
        train_imputed = pd.DataFrame(imputed_data, columns=remaining_numeric_cols)
        X = train_imputed
    else:
        X = imputer.transform(X)

    if fit:
        # estimator = RandomForestRegressor(random_state=42)
        # selector = RFECV(estimator, min_features_to_select=5, step=3, cv=5)
        selector = SelectKBest(score_func=f_regression, k=30)
        X_new = selector.fit_transform(X, y)
        selected_features = X.columns[selector.get_support()]
    else: 
        X_new = selector.transform(X)
        selected_features = [col for col, selected in zip(remaining_numeric_cols, selector.get_support()) if selected]
    df_selected = pd.DataFrame(X_new, columns=selected_features)
    return df_selected, selector, imputer

def feature_engineering(df):
    season_cols = [col for col in df.columns if 'Season' in col]
    df = df.drop(season_cols, axis=1) 
    df['BMI_Age'] = df['Physical-BMI'] * df['Basic_Demos-Age']
    df['Internet_Hours_Age'] = df['PreInt_EduHx-computerinternet_hoursday'] * df['Basic_Demos-Age']
    df['BMI_Internet_Hours'] = df['Physical-BMI'] * df['PreInt_EduHx-computerinternet_hoursday']
    df['BFP_BMI'] = df['BIA-BIA_Fat'] / df['BIA-BIA_BMI']
    df['FFMI_BFP'] = df['BIA-BIA_FFMI'] / df['BIA-BIA_Fat']
    df['FMI_BFP'] = df['BIA-BIA_FMI'] / df['BIA-BIA_Fat']
    df['LST_TBW'] = df['BIA-BIA_LST'] / df['BIA-BIA_TBW']
    df['BFP_BMR'] = df['BIA-BIA_Fat'] * df['BIA-BIA_BMR']
    df['BFP_DEE'] = df['BIA-BIA_Fat'] * df['BIA-BIA_DEE']
    df['BMR_Weight'] = df['BIA-BIA_BMR'] / df['Physical-Weight']
    df['DEE_Weight'] = df['BIA-BIA_DEE'] / df['Physical-Weight']
    df['SMM_Height'] = df['BIA-BIA_SMM'] / df['Physical-Height']
    df['Muscle_to_Fat'] = df['BIA-BIA_SMM'] / df['BIA-BIA_FMI']
    df['Hydration_Status'] = df['BIA-BIA_TBW'] / df['Physical-Weight']
    df['ICW_TBW'] = df['BIA-BIA_ICW'] / df['BIA-BIA_TBW']
    df['BMI_PHR'] = df['Physical-BMI'] * df['Physical-HeartRate']
    
    return df

## AutoEncoder for Sub 1,2,3,4,5

In [4]:
class AutoEncoder(nn.Module):
    def __init__(self, input_dim, encoding_dim):
        super(AutoEncoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, encoding_dim*3),
            nn.ReLU(),
            nn.Linear(encoding_dim*3, encoding_dim*2),
            nn.ReLU(),
            nn.Linear(encoding_dim*2, encoding_dim),
            nn.ReLU()
        )
        self.decoder = nn.Sequential(
            nn.Linear(encoding_dim, encoding_dim*2),
            nn.ReLU(),
            nn.Linear(encoding_dim*2, encoding_dim*3),
            nn.ReLU(),
            nn.Linear(encoding_dim*3, input_dim),
            nn.Sigmoid()
        )

        
    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded


def perform_autoencoder(df, encoding_dim=50, epochs=50, batch_size=32):
    scaler = StandardScaler()
    df_scaled = scaler.fit_transform(df)
    
    data_tensor = torch.FloatTensor(df_scaled)
    
    input_dim = data_tensor.shape[1]
    autoencoder = AutoEncoder(input_dim, encoding_dim)
    
    criterion = nn.MSELoss()
    optimizer = optim.Adam(autoencoder.parameters())
    
    for epoch in range(epochs):
        for i in range(0, len(data_tensor), batch_size):
            batch = data_tensor[i : i + batch_size]
            optimizer.zero_grad()
            reconstructed = autoencoder(batch)
            loss = criterion(reconstructed, batch)
            loss.backward()
            optimizer.step()
            
        if (epoch + 1) % 50 == 0:
            print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}]')    
    return autoencoder, scaler

def encode_data(autoencoder, scaler, df):
    df_scaled = scaler.transform(df)
    data_tensor = torch.FloatTensor(df_scaled)
    with torch.no_grad():
        encoded_data = autoencoder.encoder(data_tensor).numpy()

    df_encoded = pd.DataFrame(encoded_data, columns=[f'Enc_{i + 1}' for i in range(encoded_data.shape[1])])
    return df_encoded

## TrainML for Sub 2,3,4,5

In [5]:
def process_file(filename, dirname):
    df = pd.read_parquet(os.path.join(dirname, filename, 'part-0.parquet'))
    df.drop('step', axis=1, inplace=True)
    return df.describe().values.reshape(-1), filename.split('=')[1]

def load_time_series(dirname) -> pd.DataFrame:
    ids = os.listdir(dirname)
    with ThreadPoolExecutor() as executor:
        results = list(tqdm(executor.map(lambda fname: process_file(fname, dirname), ids), total=len(ids)))
    stats, indexes = zip(*results)
    df = pd.DataFrame(stats, columns=[f"stat_{i}" for i in range(len(stats[0]))])
    df['id'] = indexes
    return df

def update(df):
    global cat_c
    for c in cat_c: 
        df[c] = df[c].fillna('Missing')
        df[c] = df[c].astype('category')
    return df

def create_mapping(column, dataset):
    unique_values = dataset[column].unique()
    return {value: idx for idx, value in enumerate(unique_values)}

def quadratic_weighted_kappa(y_true, y_pred):
    return cohen_kappa_score(y_true, y_pred, weights='quadratic')

def threshold_Rounder(oof_non_rounded, thresholds):
    return np.where(oof_non_rounded < thresholds[0], 0,
                    np.where(oof_non_rounded < thresholds[1], 1,
                             np.where(oof_non_rounded < thresholds[2], 2, 3)))

def evaluate_predictions(thresholds, y_true, oof_non_rounded):
    rounded_p = threshold_Rounder(oof_non_rounded, thresholds)
    return -quadratic_weighted_kappa(y_true, rounded_p)
    
def TrainML(model_class, X, y, test_data):
    SKF = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=SEED)
    
    train_S = []
    test_S = []
    
    oof_non_rounded = np.zeros(len(y), dtype=float) 
    oof_rounded = np.zeros(len(y), dtype=int) 
    test_preds = np.zeros((len(test_data), n_splits))

    for fold, (train_idx, test_idx) in enumerate(tqdm(SKF.split(X, y), desc="Training Folds", total=n_splits)):
        X_train, X_val = X.iloc[train_idx], X.iloc[test_idx]
        y_train, y_val = y.iloc[train_idx], y.iloc[test_idx]
        model = clone(model_class)
        model.fit(X_train, y_train)

        y_train_pred = model.predict(X_train)
        y_val_pred = model.predict(X_val)

        oof_non_rounded[test_idx] = y_val_pred
        y_val_pred_rounded = y_val_pred.round(0).astype(int)
        oof_rounded[test_idx] = y_val_pred_rounded

        train_kappa = quadratic_weighted_kappa(y_train, y_train_pred.round(0).astype(int))
        val_kappa = quadratic_weighted_kappa(y_val, y_val_pred_rounded)

        train_S.append(train_kappa)
        test_S.append(val_kappa)
        
        test_preds[:, fold] = model.predict(test_data)
        
        print(f"Fold {fold+1} - Train QWK: {train_kappa:.4f}, Validation QWK: {val_kappa:.4f}")
        clear_output(wait=True)
    print(f"Mean Train QWK --> {np.mean(train_S):.4f}")
    print(f"Mean Validation QWK ---> {np.mean(test_S):.4f}")

    KappaOPtimizer = minimize(evaluate_predictions,
                              x0=[0.5, 1.5, 2.5], args=(y, oof_non_rounded), 
                              method='Nelder-Mead')
    assert KappaOPtimizer.success, "Optimization did not converge."
    print('OPTIMIZED THRESHOLDS', KappaOPtimizer.x)
    oof_tuned = threshold_Rounder(oof_non_rounded, KappaOPtimizer.x)
    tKappa = quadratic_weighted_kappa(y, oof_tuned)

    print(f"----> || Optimized QWK SCORE :: {Fore.CYAN}{Style.BRIGHT} {tKappa:.3f}{Style.RESET_ALL}")

    tpm = test_preds.mean(axis=1)
    tpTuned = threshold_Rounder(tpm, KappaOPtimizer.x)
    
    submission = pd.DataFrame({
        'id': sample['id'],
        'sii': tpTuned
    })
    optimized_thresholds = KappaOPtimizer.x
    return submission, oof_tuned, oof_non_rounded, y, optimized_thresholds

In [6]:
def TrainML_Sub1(model_class, X, y, test_data):
    SKF = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=SEED)
    
    train_S = []
    test_S = []
    
    oof_non_rounded = np.zeros(len(y), dtype=float) 
    oof_rounded = np.zeros(len(y), dtype=int) 
    test_preds = np.zeros((len(test_data), n_splits))

    for fold, (train_idx, test_idx) in enumerate(tqdm(SKF.split(X, y), desc="Training Folds", total=n_splits)):
        X_train, X_val = X.iloc[train_idx], X.iloc[test_idx]
        y_train, y_val = y.iloc[train_idx], y.iloc[test_idx]
        
        # Feature engineering for training and validation
        X_train, selector_tr, imputer_tr = feature_engineering_v2(X_train, fit=True)
        X_val, _, _ = feature_engineering_v2(X_val, selector_tr, imputer_tr, fit=False)
        # Train the model
        model = clone(model_class)
        model.fit(X_train, y_train)

        y_train_pred = model.predict(X_train)
        y_val_pred = model.predict(X_val)

        oof_non_rounded[test_idx] = y_val_pred
        y_val_pred_rounded = y_val_pred.round(0).astype(int)
        oof_rounded[test_idx] = y_val_pred_rounded

        train_kappa = quadratic_weighted_kappa(y_train, y_train_pred.round(0).astype(int))
        val_kappa = quadratic_weighted_kappa(y_val, y_val_pred_rounded)

        train_S.append(train_kappa)
        test_S.append(val_kappa)
        
        # Feature engineering for test data
        test_data_fe, _, _ = feature_engineering_v2(test_data, selector_tr, imputer_tr, fit=False)
        test_preds[:, fold] = model.predict(test_data_fe)
        
        print(f"Fold {fold+1} - Train QWK: {train_kappa:.4f}, Validation QWK: {val_kappa:.4f}")
        clear_output(wait=True)
    
    print(f"Mean Train QWK --> {np.mean(train_S):.4f}")
    print(f"Mean Validation QWK ---> {np.mean(test_S):.4f}")

    KappaOptimizer = minimize(evaluate_predictions,
                              x0=[0.5, 1.5, 2.5], args=(y, oof_non_rounded), 
                              method='Nelder-Mead')
    assert KappaOptimizer.success, "Optimization did not converge."
    print('OPTIMIZED THRESHOLDS', KappaOptimizer.x)
    
    oof_tuned = threshold_Rounder(oof_non_rounded, KappaOptimizer.x)
    tKappa = quadratic_weighted_kappa(y, oof_tuned)

    print(f"----> || Optimized QWK SCORE :: {Fore.CYAN}{Style.BRIGHT} {tKappa:.3f}{Style.RESET_ALL}")

    tpm = test_preds.mean(axis=1)
    tpTuned = threshold_Rounder(tpm, KappaOptimizer.x)
    
    submission = pd.DataFrame({
        'id': sample['id'],
        'sii': tpTuned
    })
    optimized_thresholds = KappaOptimizer.x
    return (submission, tKappa, oof_tuned, oof_non_rounded, y, optimized_thresholds)

## Functions MAE for Sub 6

In [7]:
from functools import partial
from tkinter import E

import torch
import numpy as np
import torch.nn as nn
import pandas as pd
from timm.models.vision_transformer import Block
import numpy as np
import torch, os
from torch import nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import math
import argparse

class MaskEmbed(nn.Module):
    def __init__(self, rec_len=25, embed_dim=64, norm_layer=None): 
        super().__init__()
        self.rec_len = rec_len
        self.proj = nn.Conv1d(1, embed_dim, kernel_size=1, stride=1)
        self.norm = norm_layer(embed_dim) if norm_layer else nn.Identity()

    def forward(self, x):
        B, _, L = x.shape
        x = self.proj(x)
        x = x.transpose(1, 2)
        x = self.norm(x)
        return x


class ActiveEmbed(nn.Module):
    def __init__(self, rec_len=25, embed_dim=64, norm_layer=None): 
        super().__init__()
        self.rec_len = rec_len
        self.proj = nn.Conv1d(1, embed_dim, kernel_size=1, stride=1)
        self.norm = norm_layer(embed_dim) if norm_layer else nn.Identity()

    def forward(self, x):
        B, _, L = x.shape
        x = self.proj(x)
        x = torch.sin(x)
        x = x.transpose(1, 2)
        x = self.norm(x)
        return x

def get_1d_sincos_pos_embed(embed_dim, pos, cls_token=False):
    """
    embed_dim: output dimension for each position
    pos: a list of positions to be encoded: size (M,)
    out: (M, D)
    """
    assert embed_dim % 2 == 0
    omega = np.arange(embed_dim // 2, dtype=np.float32)
    omega /= embed_dim / 2.
    omega = 1. / 10000**omega  # (D/2,)

    pos = np.arange(pos)  # (M,)
    out = np.einsum('m,d->md', pos, omega)  # (M, D/2), outer product

    emb_sin = np.sin(out) # (M, D/2)
    emb_cos = np.cos(out) # (M, D/2)

    pos_embed = np.concatenate([emb_sin, emb_cos], axis=1)  # (M, D)

    if cls_token:
        pos_embed = np.concatenate([np.zeros([1, embed_dim]), pos_embed], axis=0)

    return pos_embed


def adjust_learning_rate(optimizer, epoch, lr, min_lr, max_epochs, warmup_epochs):
    """Decay the learning rate with half-cycle cosine after warmup"""
    if epoch < warmup_epochs:
        tmp_lr = lr * epoch / warmup_epochs 
    else:
        tmp_lr = min_lr + (lr - min_lr) * 0.5 * \
            (1. + math.cos(math.pi * (epoch - warmup_epochs) / (max_epochs - warmup_epochs)))
    for param_group in optimizer.param_groups:
        if "lr_scale" in param_group:
            param_group["lr"] = tmp_lr * param_group["lr_scale"]
        else:
            param_group["lr"] = tmp_lr
    return tmp_lr


def get_grad_norm_(parameters, norm_type: float = 2.0) -> torch.Tensor:
    if isinstance(parameters, torch.Tensor):
        parameters = [parameters]
    parameters = [p for p in parameters if p.grad is not None]
    norm_type = float(norm_type)
    if len(parameters) == 0:
        return torch.tensor(0.)
    device = parameters[0].grad.device
    if norm_type == np.inf:
        total_norm = max(p.grad.detach().abs().max().to(device) for p in parameters)
    else:
        total_norm = torch.norm(torch.stack([torch.norm(p.grad.detach(), norm_type).to(device) for p in parameters]), norm_type)
    return total_norm


class NativeScaler:
    state_dict_key = "amp_scaler"
    def __init__(self):
        self._scaler = torch.cuda.amp.GradScaler()

    def __call__(self, loss, optimizer, clip_grad=None, parameters=None, create_graph=False, update_grad=True):
        self._scaler.scale(loss).backward(create_graph=create_graph)
        if update_grad:
            if clip_grad is not None:
                assert parameters is not None
                self._scaler.unscale_(optimizer)  # unscale the gradients of optimizer's assigned params in-place
                norm = torch.nn.utils.clip_grad_norm_(parameters, clip_grad)
            else:
                self._scaler.unscale_(optimizer)
                norm = get_grad_norm_(parameters)
            self._scaler.step(optimizer)
            self._scaler.update()
        else:
            norm = None
        return norm

    def state_dict(self):
        return self._scaler.state_dict()

    def load_state_dict(self, state_dict):
        self._scaler.load_state_dict(state_dict)



class MAEDataset(Dataset):
    def __init__(self, X, M):        
         self.X = X
         self.M = M

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx: int):
        return self.X[idx], self.M[idx]



def get_dataset(dataset : str, path : str):
    if dataset in ['climate', 'compression', 'wine', 'yacht', 'spam', 'letter', 'credit', 'raisin', 'bike', 'obesity', 'airfoil', 'blood', 'yeast', 'health', 'review', 'travel']:
        df = pd.read_csv(os.path.join(path, 'data', dataset + '.csv'))
        last_col = df.columns[-1]
        y = df[last_col]
        X = df.drop(columns=[last_col])
    elif dataset == 'california':
        from sklearn.datasets import fetch_california_housing
        X, y = fetch_california_housing(as_frame=True, return_X_y=True)
    elif dataset == 'diabetes':
        from sklearn.datasets import load_diabetes
        X, y = load_diabetes(as_frame=True, return_X_y=True)
    elif dataset == 'iris':
        # only for testing
        from sklearn.datasets import load_iris
        X, y = load_iris(as_frame=True, return_X_y=True)
    return X, y


eps = 1e-6
import torch
import torch.nn as nn
import numpy as np
from torch.nn import TransformerEncoderLayer

class MaskedAutoencoder(nn.Module):
    def __init__(self, rec_len=25, embed_dim=64, depth=4, num_heads=4,
                 decoder_embed_dim=64, decoder_depth=2, decoder_num_heads=4,
                 mlp_ratio=4., norm_layer=nn.LayerNorm, norm_field_loss=False,
                 encode_func='linear'):
        super().__init__()
        
        self.rec_len = rec_len
        self.embed_dim = embed_dim
        self.norm_field_loss = norm_field_loss
        
        # Encoder
        if encode_func == 'active':
            self.mask_embed = ActiveEmbed(rec_len, embed_dim, norm_layer)
        else:
            self.mask_embed = MaskEmbed(rec_len, embed_dim, norm_layer)
        
        self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
        self.pos_embed = nn.Parameter(torch.zeros(1, rec_len + 1, embed_dim), requires_grad=False)
        
        encoder_layer = TransformerEncoderLayer(d_model=embed_dim, nhead=num_heads, dim_feedforward=int(embed_dim * mlp_ratio),
                        dropout=0.0, batch_first=True)
        self.blocks = nn.TransformerEncoder(encoder_layer, depth)
        self.norm = norm_layer(embed_dim)
        
        # Decoder
        self.decoder_embed = nn.Linear(embed_dim, decoder_embed_dim, bias=True)
        self.mask_token = nn.Parameter(torch.zeros(1, 1, decoder_embed_dim))
        self.decoder_pos_embed = nn.Parameter(torch.zeros(1, rec_len + 1, decoder_embed_dim), requires_grad=False)
        
        decoder_layer = TransformerEncoderLayer(d_model=decoder_embed_dim, nhead=decoder_num_heads, dim_feedforward=int(decoder_embed_dim * mlp_ratio),
                        dropout=0.0, batch_first=True)
        self.decoder_blocks = nn.TransformerEncoder(decoder_layer, decoder_depth)
        self.decoder_norm = norm_layer(decoder_embed_dim)
        self.decoder_pred = nn.Linear(decoder_embed_dim, 1, bias=True)
        
        self.initialize_weights()

    def initialize_weights(self):
        pos_embed = get_1d_sincos_pos_embed(self.pos_embed.shape[-1], self.rec_len, cls_token=True)
        self.pos_embed.data.copy_(torch.from_numpy(pos_embed).float().unsqueeze(0))
        
        decoder_pos_embed = get_1d_sincos_pos_embed(self.decoder_pos_embed.shape[-1], self.rec_len, cls_token=True)
        self.decoder_pos_embed.data.copy_(torch.from_numpy(decoder_pos_embed).float().unsqueeze(0))
        
        torch.nn.init.xavier_uniform_(self.mask_embed.proj.weight.view([self.mask_embed.proj.weight.shape[0], -1]))
        torch.nn.init.normal_(self.cls_token, std=.02)
        torch.nn.init.normal_(self.mask_token, std=.02)
        
        self.apply(self._init_weights)

    def _init_weights(self, m):
        if isinstance(m, nn.Linear):
            torch.nn.init.xavier_uniform_(m.weight)
            if m.bias is not None:
                nn.init.constant_(m.bias, 0)
        elif isinstance(m, nn.LayerNorm):
            nn.init.constant_(m.bias, 0)
            nn.init.constant_(m.weight, 1.0)

    def random_masking(self, x, m, mask_ratio, training=None):
        N, L, D = x.shape
        if training is None:
            training = self.training
        if training:
            len_keep = int(L * (1 - mask_ratio))
            noise = torch.rand(N, L, device=x.device)
            noise[m < 1e-6] = 1
            ids_shuffle = torch.argsort(noise, dim=1)
            ids_restore = torch.argsort(ids_shuffle, dim=1)
            ids_keep = ids_shuffle[:, :len_keep]
            mask = torch.ones([N, L], device=x.device)
            mask[:, :len_keep] = 0
            mask = torch.gather(mask, dim=1, index=ids_restore)
            mask = torch.logical_or(mask, ~m.bool())
            nask = ~mask
            return mask, nask
        else:
            mask = ~m.bool()
            nask = m.bool()
            return mask, nask

    def forward_encoder(self, x, m, mask_ratio=0.5, training=None):
        x = self.mask_embed(x)
        x = x + self.pos_embed[:, 1:, :]
        mask, nask = self.random_masking(x, m, mask_ratio, training)
        x = x * (~mask.unsqueeze(-1)).float()
        cls_token = self.cls_token + self.pos_embed[:, :1, :]
        cls_tokens = cls_token.expand(x.shape[0], -1, -1)
        x = torch.cat((cls_tokens, x), dim=1)
        attn_mask = torch.cat((torch.zeros(x.shape[0], 1, device=x.device), mask), dim=1)
        x = self.blocks(src=x, src_key_padding_mask=attn_mask.bool())
        x = self.norm(x)
        return x, mask, nask

    def forward_decoder(self, x, mask):
        x = self.decoder_embed(x)
        x = x + self.decoder_pos_embed
        mask_with_cls = torch.cat((torch.zeros(x.shape[0], 1, device=x.device), mask), dim=1)
        x = self.blocks(src=x, src_key_padding_mask=mask_with_cls.bool())
        
        x = self.decoder_norm(x)
        x = self.decoder_pred(x)
        x = x[:, 1:, :].sigmoid()
        return x

    def forward_loss(self, data, pred, mask, nask):
        target = data.squeeze(dim=1)
        loss = (pred.squeeze(dim=2) - target) ** 2
        loss = (loss * mask).sum() / (mask.sum() + 1e-6) + (loss * nask).sum() / (nask.sum() + 1e-6)
        return loss

    def forward(self, data, m, mask_ratio=0.5, training=None):
        x, mask, nask = self.forward_encoder(data, m, mask_ratio, training)
        pred = self.forward_decoder(x, mask)
        loss = self.forward_loss(data, pred, mask, nask)
        return loss, pred, mask, nask

In [8]:
def eval(model, val_loader, lbl_idx=None):
    total_loss = 0.0
    cnt = 1e-2
    for (samples, masks) in val_loader:
        with torch.no_grad():
            samples = samples.unsqueeze(dim=1)
            samples = samples.to(device, non_blocking=True)
            masks = masks.to(device, non_blocking=True)                
                
            if lbl_idx is not None:
                input_samples = samples.clone()[masks[:, lbl_idx].bool()]
                input_masks = masks.clone()[masks[:, lbl_idx].bool()]
                if len(input_samples) == 0: continue
                target = input_samples[:, 0, lbl_idx].detach().clone().reshape(-1)
                input_samples[:, :, lbl_idx] = 0.0
                input_masks[:, lbl_idx] = 0
                
                _, pred, _, _ = model(input_samples, input_masks, 0.5, False)

                pred=pred[:, lbl_idx, 0].reshape(-1)
                loss = -quadratic_weighted_kappa(
                  (target.cpu().numpy() * 3.).round(0).astype(int),
                  (pred.cpu().numpy() * 3.).round(0).astype(int),
                )
            else:
                loss, _, _, _ = model(samples, masks, mask_ratio=0.5)
              
        total_loss += loss.item()
        cnt += 1
    return total_loss / cnt


eps = 1e-8
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")



from argparse import Namespace
remasker_args = Namespace(
    batch_size=64,
    max_epochs= 600,
    accum_iter=1,
    mask_ratio=0.5,
    embed_dim=32,
    depth=6,
    decoder_depth=4,
    num_heads=4,
    mlp_ratio=4.0,
    encode_func='linear',
    norm_field_loss=False,
    weight_decay=0.05,
    lr=None, blr=0.001,
    min_lr=1e-05,
    warmup_epochs=40,
    device='cuda', seed=SEED, overwrite=True, pin_mem=True
)


class ReMasker:

    def __init__(self, args=remasker_args):

        self.batch_size = args.batch_size
        self.accum_iter = args.accum_iter
        self.min_lr = args.min_lr
        self.norm_field_loss = args.norm_field_loss
        self.weight_decay = args.weight_decay
        self.lr = args.lr
        self.blr = args.blr
        self.warmup_epochs = args.warmup_epochs
        self.model = None
        self.norm_parameters = None

        self.embed_dim = args.embed_dim
        self.depth = args.depth
        self.decoder_depth = args.decoder_depth
        self.num_heads = args.num_heads
        self.mlp_ratio = args.mlp_ratio
        self.max_epochs = args.max_epochs
        self.mask_ratio = args.mask_ratio
        self.encode_func = args.encode_func

    def fit(self, X_raw: pd.DataFrame, X_val=None, lbl_idx=None):
        global dbg_var
        X = X_raw.clone()

        # Parameters
        no = len(X)
        dim = len(X[0, :])

        X = X.cpu()

        min_val = np.zeros(dim)
        max_val = np.zeros(dim)

        for i in range(dim):
            min_val[i] = np.nanmin(X[:, i])
            max_val[i] = np.nanmax(X[:, i])
            X[:, i] = (X[:, i] - min_val[i]) / (max_val[i] - min_val[i] + eps)

        self.norm_parameters = {"min": min_val, "max": max_val}

        # Set missing
        M = 1 - (1 * (np.isnan(X)))
        M = M.float().to(device)

        X = torch.nan_to_num(X)
        X = X.to(device)

        self.model = MaskedAutoencoder(
            rec_len=dim,
            embed_dim=self.embed_dim,
            depth=self.depth,
            num_heads=self.num_heads,
            decoder_embed_dim=self.embed_dim,
            decoder_depth=self.decoder_depth,
            decoder_num_heads=self.num_heads,
            mlp_ratio=self.mlp_ratio,
            norm_layer=partial(nn.LayerNorm, eps=eps),
            norm_field_loss=self.norm_field_loss,
            encode_func=self.encode_func
        )
        self.model.to(device)
        eff_batch_size = self.batch_size * self.accum_iter
        if self.lr is None: 
            self.lr = self.blr * eff_batch_size / 64
        optimizer = torch.optim.AdamW(self.model.parameters(), lr=self.lr, betas=(0.9, 0.95))
        loss_scaler = NativeScaler()

        dataset = MAEDataset(X, M)
        dataloader = DataLoader(
            dataset, sampler=RandomSampler(dataset),
            batch_size=self.batch_size,
        )

        best_loss = 1e9
        best_model = copy.deepcopy(self.model)
        for epoch in range(self.max_epochs):
            self.model.train()

            optimizer.zero_grad()
            total_loss = 0
            lbl_loss = 0.

            iter = 0
            for iter, (samples, masks) in enumerate(dataloader):
                if iter % self.accum_iter == 0:
                    adjust_learning_rate(optimizer, iter / len(dataloader) + epoch, self.lr, self.min_lr,
                                         self.max_epochs, self.warmup_epochs)

                samples = samples.unsqueeze(dim=1)
                samples = samples.to(device, non_blocking=True)
                masks = masks.to(device, non_blocking=True)

                with torch.cuda.amp.autocast():
                    loss, _, _, _ = self.model(samples, masks, mask_ratio=self.mask_ratio)

                    if lbl_idx is not None:
                        input_samples = samples.clone()[masks[:, lbl_idx].bool()]
                        input_masks = masks.clone()[masks[:, lbl_idx].bool()]
                        if len(input_samples) == 0: continue
                        
                        target = input_samples[:, 0, lbl_idx].detach().clone().reshape(-1)
                        input_samples[:, :, lbl_idx] = 0.0
                        input_masks[:, lbl_idx] = 0
                        
                        _, pred, _, _ = self.model(input_samples, input_masks, 0.5, False)
                        pred = pred[:, lbl_idx, 0].reshape(-1)
                        lbl_weight = 1
                        loss += lbl_weight * F.mse_loss(pred, target)
                        lbl_loss += F.mse_loss(pred, target).item()
                            
                        
                    loss_value = loss.item()
                    total_loss += loss_value
                if not math.isfinite(loss_value):
                    print("Loss is {}, stopping training".format(loss_value))
                    dbg_var = (samples, masks)
                    sys.exit(1)

                loss /= self.accum_iter
                loss_scaler(loss, optimizer, parameters=self.model.parameters(),
                            update_grad=(iter + 1) % self.accum_iter == 0)

                if (iter + 1) % self.accum_iter == 0:
                    optimizer.zero_grad()
            total_loss = (total_loss / (iter + 1))
            self.model.eval()
            val_loss = self.evaluate(X_val, lbl_idx)
            if val_loss <= best_loss:
                best_loss = val_loss
                best_model = copy.deepcopy(self.model)
            if (epoch + 1) % max(1, self.max_epochs // 10) == 0 or epoch == 0:
                lbl_loss = lbl_loss / (iter + 1)
                print("Epoch: %d, train;val;best qwk: %.4f;%.4f;%.4f, loss: %.4f, lbl_loss: %.4f, val_loss: %.4f" % 
                    (epoch+1, -self.evaluate(X_raw, lbl_idx), -val_loss, -best_loss, total_loss, lbl_loss, val_loss)
                )
                

        self.model = best_model
        print(f'Loaded best model with loss={best_loss:.4f}')
        return self      
      
    def evaluate(self, X_raw: torch.Tensor, lbl_idx):
        keep_indices = torch.where(~X_raw[:, lbl_idx].isnan())[0]
        X_raw = X_raw[keep_indices]
        gt = X_raw[:, lbl_idx].cpu().numpy().round(0).astype(int)
        X_raw[:, lbl_idx] = float('nan')
        yp = self.predict(X_raw, lbl_idx)
        yp = yp.cpu().numpy().round(0).astype(int)
        return -quadratic_weighted_kappa(gt, yp)
      
      
    def predict(self, X_raw: torch.Tensor, lbl_idx, bs=None):
        X_raw = torch.tensor(X_raw, dtype=torch.float32)
        
        # Normalize the input data
        min_val = self.norm_parameters["min"]
        max_val = self.norm_parameters["max"]
        X = X_raw.clone()
        for i in range(X.shape[1]):
            X[:, i] = (X[:, i] - min_val[i]) / (max_val[i] - min_val[i] + eps)
        
        M = (1 - (1 * torch.isnan(X))).float().to(device)
        
        X = torch.nan_to_num(X)
        X = X.to(device)
        
        if bs == None: bs = self.batch_size
        # Prepare DataLoader
        dataset = MAEDataset(X, M)
        dataloader = DataLoader(dataset, batch_size=bs, shuffle=False)
        
        # Ensure model is in evaluation mode
        self.model.eval()
        
        # Tensor to hold predictions
        predictions = torch.zeros(0).to(device)
        
        with torch.no_grad():
            for batch_samples, batch_masks in dataloader:
                # Prepare input for the model
                batch_samples = batch_samples.unsqueeze(dim=1).to(device)
                batch_masks = batch_masks.to(device)
                
                # Forward pass with training=False
                _, pred, _, _ = self.model(batch_samples, batch_masks, mask_ratio=0.5, training=False)
                
                pred = pred.squeeze(dim=2)[:, lbl_idx]
                
                predictions = torch.cat((predictions, pred), 0)
        
        return predictions * 3.

    def fit_transform(self, X: torch.Tensor) -> torch.Tensor:
        """Imputes the provided dataset using the GAIN strategy.
        Args:
            X: np.ndarray
                A dataset with missing values.
        Returns:
            Xhat: The imputed dataset.
        """
        X = torch.tensor(X.values, dtype=torch.float32)
        return self.fit(X).transform(X).detach().cpu().numpy()

In [9]:
def random_extend(arr, k):
    indices = np.concatenate([np.random.permutation(len(arr)) for _ in range(10)])[:k]
    return arr[indices]

In [10]:
def PerformImpute(imputer_args):
    global X_raw, X_tensor_test, num_folds
    train_S = []
    test_S = []
    
    KF = KFold(n_splits=num_folds, shuffle=True, random_state=SEED)

    oof_non_rounded = []
    oof_rounded = []
    oof_gt = []
    test_preds = np.zeros((len(X_tensor_test), num_folds))

    lbl_idx = full_df.columns.get_loc('sii')
    
    lbled_indices = torch.where(~X_raw[:, lbl_idx].isnan())[0]

    pbar = tqdm(KF.split(lbled_indices), desc="Training Folds", total=n_splits)    

    for fold, (train_idx_idx, test_idx_idx) in enumerate(pbar):
        train_idx = lbled_indices[train_idx_idx]
        test_idx = lbled_indices[test_idx_idx]

        
        X_train = X_raw.clone()
        X_train[test_idx, lbl_idx] = float('nan')
        
        X_val = X_raw[test_idx].clone()

        X_train = random_extend(X_train, 9000)
        
    
        train_nonna_indices = torch.where(~X_train[:, lbl_idx].isnan())[0]
        val_nonna_indices = torch.where(~X_val[:, lbl_idx].isnan())[0]
        if len(train_nonna_indices) == 0 or len(val_nonna_indices)==0: continue
    
        imputer = ReMasker(imputer_args)
        imputer.fit(X_train, X_val, lbl_idx)

        y_train_ = X_train[train_nonna_indices, lbl_idx].numpy().astype(int)
        y_val_ = X_val[val_nonna_indices, lbl_idx].numpy().astype(int)

        X_train[:, lbl_idx] = float('nan')
        X_val[:, lbl_idx] = float('nan')
        
        y_train_pred = imputer.predict(X_train[train_nonna_indices], lbl_idx).cpu().detach().numpy()
        y_val_pred = imputer.predict(X_val[val_nonna_indices], lbl_idx).cpu().detach().numpy()
        y_test_pred = imputer.predict(X_tensor_test, lbl_idx).cpu().detach().numpy()

        oof_non_rounded += [y_val_pred]
        y_val_pred_rounded = y_val_pred.round(0).astype(int)
        oof_rounded += [y_val_pred_rounded]
        oof_gt += [y_val_]
        
        train_kappa = quadratic_weighted_kappa(y_train_, y_train_pred.round(0).astype(int))
        val_kappa = quadratic_weighted_kappa(y_val_, y_val_pred.round(0).astype(int))

        train_S.append(train_kappa)
        test_S.append(val_kappa)
        
        
        test_preds[:, fold] = y_test_pred

        clear_output(wait=True)
        pbar.set_description_str(
          "Fold %d, Train MSE: %.4f, Val MSE: %.4f, Train QWK: %.4f, Val QWK: %.4f" % (
              fold + 1,
              ((y_train_pred - y_train_) ** 2 / 9.).mean(),
              ((y_val_pred - y_val_) ** 2 / 9.).mean(),
              train_kappa,
              val_kappa
          )
        )
    
    
    print(f"Mean Train QWK --> {np.mean(train_S):.4f}")
    print(f"Mean Validation QWK ---> {np.mean(test_S):.4f}")

    oof_non_rounded = np.concatenate(oof_non_rounded)
    oof_gt = np.concatenate(oof_gt)
    KappaOPtimizer = minimize(evaluate_predictions,
                              x0=[0.5, 1.5, 2.5], args=(oof_gt, oof_non_rounded), 
                              method='Nelder-Mead')
    assert KappaOPtimizer.success, "Optimization did not converge."
    
    oof_tuned = threshold_Rounder(oof_non_rounded, KappaOPtimizer.x)
    tKappa = quadratic_weighted_kappa(oof_gt, oof_tuned)

    print(f"----> || Optimized QWK SCORE :: {Fore.CYAN}{Style.BRIGHT} {tKappa:.3f}{Style.RESET_ALL}")
    
    tpm = test_preds.mean(axis=1)
    tpTuned = threshold_Rounder(tpm, KappaOPtimizer.x)
    
    sample_sub_df = pd.read_csv('../input/child-mind-institute-problematic-internet-use/sample_submission.csv')
    submission = pd.DataFrame({
        'id': sample_sub_df['id'],
        'sii': tpTuned
    })

    return submission

# Define features

## Normal features

In [11]:
train = pd.read_csv('/kaggle/input/child-mind-institute-problematic-internet-use/train.csv')
test = pd.read_csv('/kaggle/input/child-mind-institute-problematic-internet-use/test.csv')
sample = pd.read_csv('/kaggle/input/child-mind-institute-problematic-internet-use/sample_submission.csv')

total_features = list(test.columns)
total_features.remove('id')

cat_c = ['Basic_Demos-Enroll_Season', 'CGAS-Season', 'Physical-Season', 
          'Fitness_Endurance-Season', 'FGC-Season', 'BIA-Season', 
          'PAQ_A-Season', 'PAQ_C-Season', 'SDS-Season', 'PreInt_EduHx-Season']

In [12]:
noseason_features = ['Basic_Demos-Age', 'Basic_Demos-Sex',
                'CGAS-CGAS_Score', 'Physical-BMI',
                'Physical-Height', 'Physical-Weight', 'Physical-Waist_Circumference',
                'Physical-Diastolic_BP', 'Physical-HeartRate', 'Physical-Systolic_BP',
                'Fitness_Endurance-Max_Stage',
                'Fitness_Endurance-Time_Mins', 'Fitness_Endurance-Time_Sec',
                'FGC-FGC_CU', 'FGC-FGC_CU_Zone', 'FGC-FGC_GSND',
                'FGC-FGC_GSND_Zone', 'FGC-FGC_GSD', 'FGC-FGC_GSD_Zone', 'FGC-FGC_PU',
                'FGC-FGC_PU_Zone', 'FGC-FGC_SRL', 'FGC-FGC_SRL_Zone', 'FGC-FGC_SRR',
                'FGC-FGC_SRR_Zone', 'FGC-FGC_TL', 'FGC-FGC_TL_Zone',
                'BIA-BIA_Activity_Level_num', 'BIA-BIA_BMC', 'BIA-BIA_BMI',
                'BIA-BIA_BMR', 'BIA-BIA_DEE', 'BIA-BIA_ECW', 'BIA-BIA_FFM',
                'BIA-BIA_FFMI', 'BIA-BIA_FMI', 'BIA-BIA_Fat', 'BIA-BIA_Frame_num',
                'BIA-BIA_ICW', 'BIA-BIA_LDM', 'BIA-BIA_LST', 'BIA-BIA_SMM',
                'BIA-BIA_TBW', 'PAQ_A-PAQ_A_Total',
                'PAQ_C-PAQ_C_Total', 'SDS-SDS_Total_Raw',
                'SDS-SDS_Total_T',
                'PreInt_EduHx-computerinternet_hoursday', 'BMI_Age','Internet_Hours_Age','BMI_Internet_Hours',
                'BFP_BMI', 'FFMI_BFP', 'FMI_BFP', 'LST_TBW', 'BFP_BMR', 'BFP_DEE', 'BMR_Weight', 'DEE_Weight',
                'SMM_Height', 'Muscle_to_Fat', 'Hydration_Status', 'ICW_TBW','BMI_PHR']
print(len(noseason_features))

64


## Loading timeseries

In [13]:
train_ts = load_time_series("/kaggle/input/child-mind-institute-problematic-internet-use/series_train.parquet")
test_ts = load_time_series("/kaggle/input/child-mind-institute-problematic-internet-use/series_test.parquet")

100%|██████████| 996/996 [01:36<00:00, 10.35it/s]
100%|██████████| 2/2 [00:00<00:00,  6.65it/s]


In [14]:
df_train = train_ts.drop('id', axis=1)
df_test = test_ts.drop('id', axis=1)
autoencoder, scaler = perform_autoencoder(df_train, encoding_dim=60, epochs=100, batch_size=32)

Epoch [50/100], Loss: 1.4655]
Epoch [100/100], Loss: 1.4492]


In [15]:
df_train.shape[1]

96

In [16]:
train_ts_encoded = encode_data(autoencoder, scaler, df_train)
test_ts_encoded = encode_data(autoencoder, scaler, df_test)
test_ts_encoded.reset_index(inplace=True, drop=True)

In [17]:
test_ts_encoded

Unnamed: 0,Enc_1,Enc_2,Enc_3,Enc_4,Enc_5,Enc_6,Enc_7,Enc_8,Enc_9,Enc_10,Enc_11,Enc_12,Enc_13,Enc_14,Enc_15,Enc_16,Enc_17,Enc_18,Enc_19,Enc_20,Enc_21,Enc_22,Enc_23,Enc_24,Enc_25,Enc_26,Enc_27,Enc_28,Enc_29,Enc_30,Enc_31,Enc_32,Enc_33,Enc_34,Enc_35,Enc_36,Enc_37,Enc_38,Enc_39,Enc_40,Enc_41,Enc_42,Enc_43,Enc_44,Enc_45,Enc_46,Enc_47,Enc_48,Enc_49,Enc_50,Enc_51,Enc_52,Enc_53,Enc_54,Enc_55,Enc_56,Enc_57,Enc_58,Enc_59,Enc_60
0,0.0,5.337389,7.215616,4.216683,0.0,5.344681,0.0,1.045033,0.898455,2.708214,3.399186,2.038562,0.0,0.0,3.918493,1.856204,0.0,0.471048,9.064348,2.624146,2.383941,6.034007,6.280801,0.0,0.0,5.305665,0.0,0.0,7.107492,5.810652,4.795111,1.981038,0.0,0.0,0.0,5.866778,0.0,2.708674,0.0,0.0,8.812943,0.0,6.403038,7.655363,4.856868,4.56655,0.0,7.363611,0.852052,2.31389,0.0,0.0,0.27512,0.0,5.269929,3.051205,6.881392,0.0,0.0,0.0
1,6.864849,0.490974,0.685794,0.0,0.0,0.0,0.0,3.844508,5.394163,2.186373,2.682933,4.056297,0.0,0.0,3.080273,5.332508,2.287547,0.0,5.950637,1.646377,0.0,0.0,0.0,0.0,2.572919,0.0,0.0,7.725673,0.0,3.378885,4.879403,3.119649,0.0,0.0,0.0,5.350994,0.0,3.392251,0.0,0.0,0.0,0.0,0.0,0.614659,0.250168,2.762118,0.0,2.877998,0.0,0.524518,4.918088,0.0,0.952121,0.0,6.409425,6.154183,6.145087,4.371527,9.287868,0.0


In [18]:
train_ts_encoded["id"]=train_ts["id"]
test_ts_encoded['id']=test_ts["id"]

## Features timeseries

In [19]:
time_series_cols = train_ts.columns.tolist()
time_series_cols.remove("id")
time_encoded_cols = train_ts_encoded.columns.tolist()
time_encoded_cols.remove("id")

# Submission 1

In [20]:
!pip -q install /kaggle/input/pytorchtabnet/pytorch_tabnet-4.1.0-py3-none-any.whl

In [21]:
from pytorch_tabnet.tab_model import TabNetRegressor

In [22]:
from sklearn.base import BaseEstimator, RegressorMixin
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from pytorch_tabnet.callbacks import Callback
import os
import torch
from pytorch_tabnet.callbacks import Callback

class TabNetWrapper(BaseEstimator, RegressorMixin):
    def __init__(self, **kwargs):
        self.model = TabNetRegressor(**kwargs)
        self.kwargs = kwargs
        self.imputer = SimpleImputer(strategy='median')
        self.best_model_path = 'best_tabnet_model.pt'
        
    def fit(self, X, y):
        X_imputed = self.imputer.fit_transform(X)
        
        if hasattr(y, 'values'):
            y = y.values
            
        X_train, X_valid, y_train, y_valid = train_test_split(
            X_imputed, 
            y, 
            test_size=0.2,
            random_state=42
        )
        
        history = self.model.fit(
            X_train=X_train,
            y_train=y_train.reshape(-1, 1),
            eval_set=[(X_valid, y_valid.reshape(-1, 1))],
            eval_name=['valid'],
            eval_metric=['mse'],
            max_epochs=200,
            patience=20,
            batch_size=1024,
            virtual_batch_size=128,
            num_workers=0,
            drop_last=False,
            callbacks=[
                TabNetPretrainedModelCheckpoint(
                    filepath=self.best_model_path,
                    monitor='valid_mse',
                    mode='min',
                    save_best_only=True,
                    verbose=True
                )
            ]
        )
        
        if os.path.exists(self.best_model_path):
            self.model.load_model(self.best_model_path)
            os.remove(self.best_model_path)  
        
        return self
    
    def predict(self, X):
        X_imputed = self.imputer.transform(X)
        return self.model.predict(X_imputed).flatten()
    
    def __deepcopy__(self, memo):
        cls = self.__class__
        result = cls.__new__(cls)
        memo[id(self)] = result
        for k, v in self.__dict__.items():
            setattr(result, k, deepcopy(v, memo))
        return result

TabNet_Params = {
    'n_d': 64,              # Width of the decision prediction layer
    'n_a': 64,              # Width of the attention embedding for each step
    'n_steps': 5,           # Number of steps in the architecture
    'gamma': 1.5,           # Coefficient for feature selection regularization
    'n_independent': 2,     # Number of independent GLU layer in each GLU block
    'n_shared': 2,          # Number of shared GLU layer in each GLU block
    'lambda_sparse': 1e-4,  # Sparsity regularization
    'optimizer_fn': torch.optim.Adam,
    'optimizer_params': dict(lr=2e-2, weight_decay=1e-5),
    'mask_type': 'entmax',
    'scheduler_params': dict(mode="min", patience=10, min_lr=1e-5, factor=0.5),
    'scheduler_fn': torch.optim.lr_scheduler.ReduceLROnPlateau,
    'verbose': 1,
    'device_name': 'cuda' if torch.cuda.is_available() else 'cpu'
}

class TabNetPretrainedModelCheckpoint(Callback):
    def __init__(self, filepath, monitor='val_loss', mode='min', 
                 save_best_only=True, verbose=1):
        super().__init__()  # Initialize parent class
        self.filepath = filepath
        self.monitor = monitor
        self.mode = mode
        self.save_best_only = save_best_only
        self.verbose = verbose
        self.best = float('inf') if mode == 'min' else -float('inf')
        
    def on_train_begin(self, logs=None):
        self.model = self.trainer  # Use trainer itself as model
        
    def on_epoch_end(self, epoch, logs=None):
        logs = logs or {}
        current = logs.get(self.monitor)
        if current is None:
            return
        
        # Check if current metric is better than best
        if (self.mode == 'min' and current < self.best) or \
           (self.mode == 'max' and current > self.best):
            if self.verbose:
                print(f'\nEpoch {epoch}: {self.monitor} improved from {self.best:.4f} to {current:.4f}')
            self.best = current
            if self.save_best_only:
                self.model.save_model(self.filepath)  # Save the entire model

In [23]:
train_sub1 = pd.merge(train, train_ts_encoded, how="left", on='id')
test_sub1 = pd.merge(test, test_ts_encoded, how="left", on='id')
train_sub1 = train_sub1.dropna(subset='sii')

In [24]:
X_sub1 = train_sub1
y_sub1 = train_sub1['sii']

In [25]:
!pip -q install optuna

import optuna

In [26]:
def objective_sub1(trial):
    CatBoost_Params = {
        'learning_rate': trial.suggest_float('catboost_learning_rate', 1e-3, 0.3, log=True),
        'depth': trial.suggest_int('catboost_depth', 4, 10),
        'random_seed': SEED,
        'verbose': 0,
        'l2_leaf_reg': trial.suggest_float('catboost_l2_leaf_reg', 0.01, 10.0, log=True),
        'iterations': trial.suggest_int('catboost_iterations', 100, 400, 10),
        #'task_type': 'GPU',  
        #'devices': '0'      
    }
    XGB_Params = {
        'n_estimators': trial.suggest_int('xgb_n_estimators', 500, 1500, 100),
        'max_depth': trial.suggest_int('xgb_max_depth', 1, 10),
        'learning_rate': trial.suggest_float('xgb_learning_rate', 0.01, 0.1, log=True),
        'subsample': trial.suggest_float('xgb_subsample', 0.1, 1.0),
        'colsample_bytree': trial.suggest_float('xgb_colsample_bytree', 0.05, 1.0),
        'gamma': trial.suggest_float('xgb_gamma', 1e-2, 1.0),
        'min_child_weight': trial.suggest_int('xgb_min_child_weight', 1, 100),
        'eval_metric': 'rmse',
        'objective': 'reg:squarederror',
        #'tree_method': 'gpu_hist',
        #'predictor': 'gpu_predictor',
        #'gpu_id': 0
    }
    Params = {
        'max_depth': trial.suggest_int('lightgbm_max_depth', 3, 12),  # Avoid overly shallow or deep trees
        'min_data_in_leaf': trial.suggest_int('lightgbm_min_data_in_leaf', 5, 50),  # Balance between overfitting and splits
        'num_leaves': trial.suggest_int('lightgbm_num_leaves', 16, 256),  # Limit complexity to avoid splits failing
        'learning_rate': trial.suggest_float('lightgbm_learning_rate', 0.01, 0.1),
        'feature_fraction': trial.suggest_float('lightgbm_feature_fraction', 0.7, 1.0),  # Allow feature subsampling tuning
        'bagging_fraction': trial.suggest_float('lightgbm_bagging_fraction', 0.7, 1.0),  # Improve generalization
        'bagging_freq': trial.suggest_int('lightgbm_bagging_freq', 1, 5),
        'lambda_l1': trial.suggest_float('lightgbm_lambda_l1', 0.0, 10.0),  # Regularization tuning
        'lambda_l2': trial.suggest_float('lightgbm_lambda_l2', 0.0, 10.0),
        'min_gain_to_split': trial.suggest_float('lightgbm_min_gain_to_split', 0.0, 0.1),  # Ensure splits happen
        #'device_type': 'gpu',
        #'gpu_device_id': 0,
        'verbosity': -1  
    }
    TabNet_Params = {
        'n_d': 64,
        'n_a': 64,
        'n_steps': 5,
        'gamma': 1.5,
        'n_independent': 2,
        'n_shared': 2,
        'lambda_sparse': 1e-4,
        'optimizer_fn': torch.optim.Adam,
        'optimizer_params': dict(lr=2e-2, weight_decay=1e-5),
        'mask_type': 'entmax',
        'scheduler_params': dict(mode="min", patience=10, min_lr=1e-5, factor=0.5),
        'scheduler_fn': torch.optim.lr_scheduler.ReduceLROnPlateau,
        'verbose': -1,
        'device_name': 'cuda' if torch.cuda.is_available() else 'cpu'
    }
    LightGBM_Model = LGBMRegressor(**Params)
    XGB_Model = XGBRegressor(**XGB_Params)
    CatBoost_Model = CatBoostRegressor(**CatBoost_Params)
    TabNet_Model = TabNetWrapper(**TabNet_Params) 
    voting_model = VotingRegressor(estimators=[
        ('lightgbm', LightGBM_Model),
        ('xgboost', XGB_Model),
        ('catboost', CatBoost_Model),
        ('tabnetModel', TabNet_Model),
    ], weights=[4.0, 4.0, 4.0, 5.0])
    X = train.drop(['sii'], axis=1)
    y = train['sii']

    submission2, val_score, _, _, _, _ = TrainML_Sub1(voting_model, X_sub1, y_sub1, test_sub1)
    return val_score

In [27]:
# study_sub1 = optuna.create_study(direction='maximize')
# study_sub1.optimize(objective_sub1, n_trials=2)

In [28]:
# best_params_sub1 = study_sub1.best_params
# print("Best hyperparameters:", best_params_sub1)

CatBoost_Best_Params = {
    'learning_rate': 0.0021172579310639343,
    'depth': 6,
    'iterations': 130,
    'random_seed': SEED,
    'verbose': 0,
    'l2_leaf_reg': 0.32557701990001503,
}

XGB_Best_Params = {
    'n_estimators': 700,
    'max_depth': 4,
    'learning_rate': 0.03325152156380898,
    'subsample': 0.25295047248406266,
    'colsample_bytree': 0.9760859719849787,
    'gamma': 0.20085951790463402,
    'min_child_weight': 11,
    'eval_metric': 'rmse',
    'objective': 'reg:squarederror',
}

LightGBM_Best_Params = {
    'max_depth': 3,
    'min_data_in_leaf': 40,
    'num_leaves': 190,
    'learning_rate': 0.05107368421432176,
    'feature_fraction': 0.9918350138636185,
    'bagging_fraction': 0.9331400899763774,
    'bagging_freq': 1,
    'lambda_l1': 9.49641646280519,
    'lambda_l2': 2.446305429623661,
    'min_gain_to_split': 0.05262124930522051,
    'verbosity': -1
}

catboost_model = CatBoostRegressor(**CatBoost_Best_Params)
xgb_model = XGBRegressor(**XGB_Best_Params)
lightgbm_model = LGBMRegressor(**LightGBM_Best_Params)

final_voting_model = VotingRegressor(estimators=[
    ('lightgbm', lightgbm_model),
    ('xgboost', xgb_model),
    ('catboost', catboost_model),
], weights=[4.0, 4.0, 4.0])

X = train.drop(['sii'], axis=1)
y = train['sii']

In [29]:
submission1, val_score_sub1, _, _, _, _ = TrainML_Sub1(lightgbm_model, X_sub1, y_sub1, test_sub1)

print("Val score sub2 with best parameters:", val_score_sub1)

Training Folds: 100%|██████████| 5/5 [00:10<00:00,  2.10s/it]

Mean Train QWK --> 0.4516
Mean Validation QWK ---> 0.3644





OPTIMIZED THRESHOLDS [0.5580865  1.00878806 2.87177879]
----> || Optimized QWK SCORE :: [36m[1m 0.478[0m
Val score sub2 with best parameters: 0.4783086216364635


# Submission 2

In [30]:
Params = {
    'learning_rate': 0.046,
    'max_depth': 12,
    'num_leaves': 478,
    'min_data_in_leaf': 13,
    'feature_fraction': 0.893,
    'bagging_fraction': 0.784,
    'bagging_freq': 4,
    'lambda_l1': 10,  
    'lambda_l2': 0.01, 
    'device': 'cpu'
}

XGB_Params = {
    'learning_rate': 0.05,
    'max_depth': 6,
    'n_estimators': 200,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'reg_alpha': 1,  
    'reg_lambda': 5, 
    'random_state': SEED,
    'tree_method': 'gpu_hist',
}

CatBoost_Params = {
    'learning_rate': 0.05,
    'depth': 6,
    'iterations': 200,
    'random_seed': SEED,
    'verbose': 0,
    'l2_leaf_reg': 10,
    'task_type': 'GPU'
}

In [31]:
Light = LGBMRegressor(**Params, random_state=SEED, verbose=-1, n_estimators=300)
XGB_Model = XGBRegressor(**XGB_Params)
CatBoost_Model = CatBoostRegressor(**CatBoost_Params)
TabNet_Model = TabNetWrapper(**TabNet_Params) 
voting_model = VotingRegressor(estimators=[
    ('lightgbm', Light),
    ('xgboost', XGB_Model),
    ('catboost', CatBoost_Model),
    ('tabnet', TabNet_Model)
],weights=[4.0,4.0,5.0,4.0])

In [32]:
train_sub2 = pd.merge(train, train_ts_encoded, how="left", on='id')
test_sub2 = pd.merge(test, test_ts_encoded, how="left", on='id')

imputer = KNNImputer(n_neighbors=5)
numeric_cols = train.select_dtypes(include=['float64', 'int64']).columns
imputed_data = imputer.fit_transform(train_sub2[numeric_cols])
train_imputed = pd.DataFrame(imputed_data, columns=numeric_cols)
train_imputed['sii'] = train_imputed['sii'].round().astype(int)

for col in train_sub2.columns:
    if col not in numeric_cols:
        train_imputed[col] = train_sub2[col]
        
train_sub2 = train_imputed

train_sub2 = feature_engineering(train_sub2)
test_sub2 = feature_engineering(test_sub2)

train_sub2 = train_sub2.drop('id', axis=1)
test_sub2  = test_sub2.drop('id', axis=1)

features_sub2 = noseason_features + time_encoded_cols

train_sub2 = train_sub2.dropna(subset='sii')

In [33]:
if np.any(np.isinf(train_sub2)):
    train_sub2 = train_sub2.replace([np.inf, -np.inf], np.nan)

X_sub2 = train_sub2[features_sub2]
y_sub2 = train_sub2['sii']
test_sub2 = test_sub2[features_sub2]

In [34]:
# submission2, _, _, _, _ = TrainML(voting_model, X_sub2, y_sub2, test_sub2)

In [35]:
# submission2.to_csv("submission2.csv", index=False)

# Submission 3

In [36]:
train_sub3 = pd.merge(train, train_ts, how="left", on='id')
test_sub3 = pd.merge(test, test_ts, how="left", on='id')

train_sub3 = train_sub3.drop('id', axis=1)
test_sub3 = test_sub3.drop('id', axis=1) 

In [37]:
features_sub3 = total_features + time_series_cols

In [38]:
train_sub3 = train_sub3.dropna(subset='sii')

In [39]:
train_sub3 = update(train_sub3)
test_sub3 = update(test_sub3)

for col in cat_c:
    mapping = create_mapping(col, train_sub3)
    mappingTe = create_mapping(col, test_sub3)
    
    train_sub3[col] = train_sub3[col].replace(mapping).astype(int)
    test_sub3[col] = test_sub3[col].replace(mappingTe).astype(int)


In [40]:
X_sub3 = train_sub3[features_sub3]
y_sub3 = train_sub3['sii']
test_sub3 = test_sub3[features_sub3]

In [41]:
def objective_sub3(trial):
    CatBoost_Params = {
        'learning_rate': trial.suggest_float('catboost_learning_rate', 1e-3, 0.3, log=True),
        'depth': trial.suggest_int('catboost_depth', 4, 10),
        'iterations': 200,
        'random_seed': SEED,
        'verbose': 0,
        'l2_leaf_reg': trial.suggest_float('catboost_l2_leaf_reg', 0.01, 10.0, log=True),
        'task_type': 'GPU',  
        'devices': '0',
        'cat_features': cat_c
    }
    XGB_Params = {
        'n_estimators': trial.suggest_int('xgb_max_depth', 200, 1000),
        'max_depth': trial.suggest_int('xgb_max_depth', 1, 10),
        'learning_rate': trial.suggest_float('xgb_learning_rate', 0.01, 0.1, log=True),
        'subsample': trial.suggest_float('xgb_subsample', 0.1, 1.0),
        'colsample_bytree': trial.suggest_float('xgb_colsample_bytree', 0.05, 1.0),
        'gamma': trial.suggest_float('xgb_gamma', 1e-2, 1.0),
        'min_child_weight': trial.suggest_int('xgb_min_child_weight', 1, 100),
        'eval_metric': 'rmse',
        'objective': 'reg:squarederror',
        'tree_method': 'gpu_hist',
        'predictor': 'gpu_predictor',
        'gpu_id': 0,
        'random_state': SEED
    }
    Params = {
        'max_depth': trial.suggest_int('lightgbm_max_depth', 1, 12),
        'min_data_in_leaf': trial.suggest_int('lightgbm_min_data_in_leaf', 1, 100),
        'num_leaves': trial.suggest_int('lightgbm_num_leaves', 8, 500),
        'learning_rate': trial.suggest_float('lightgbm_learning_rate', 0.01, 0.1),
        'feature_fraction': 0.893,
        'bagging_fraction': 0.784,
        'bagging_freq': 4,
        'lambda_l1': 10,
        'lambda_l2': 0.01,
        'device_type': 'gpu',
        'gpu_device_id': 0, 
        'verbosity': -1
    }
    LightGBM_Model = LGBMRegressor(**Params)    
    XGB_Model = XGBRegressor(**XGB_Params)
    CatBoost_Model = CatBoostRegressor(**CatBoost_Params)
    voting_model = VotingRegressor(estimators=[
        ('lightgbm', LightGBM_Model),
        ('xgboost', XGB_Model),
        ('catboost', CatBoost_Model),
    ])
    X = train.drop(['sii'], axis=1)
    y = train['sii']

    submission3, val_score, _, _, _ = TrainML(voting_model, X_sub3, y_sub3, test_sub3)
    return val_score

In [42]:
# study_sub3 = optuna.create_study(direction='maximize')
# study_sub3.optimize(objective_sub3, n_trials=2)

In [43]:
# best_params_sub3 = study_sub3.best_params
# print("Best hyperparameters:", best_params_sub3)
'''
CatBoost_Best_Params = {
    'learning_rate': best_params_sub2['catboost_learning_rate'],
    'depth': best_params_sub2['catboost_depth'],
    'iterations': 200,
    'random_seed': SEED,
    'verbose': 0,
    'l2_leaf_reg': best_params_sub2['catboost_l2_leaf_reg'],
    'task_type': 'GPU',  
    'devices': '0',
    'cat_features': cat_c
}

XGB_Best_Params = {
    'n_estimators': 1000,
    'max_depth': best_params_sub2['xgb_max_depth'],
    'learning_rate': best_params_sub2['xgb_learning_rate'],
    'subsample': best_params_sub2['xgb_subsample'],
    'colsample_bytree': best_params_sub2['xgb_colsample_bytree'],
    'gamma': best_params_sub2['xgb_gamma'],
    'min_child_weight': best_params_sub2['xgb_min_child_weight'],
    'eval_metric': 'rmse',
    'objective': 'reg:squarederror',
    'tree_method': 'gpu_hist',
    'predictor': 'gpu_predictor',
    'gpu_id': 0,
    'random_state': SEED,
}

LightGBM_Best_Params = {
    'max_depth': best_params_sub2['lightgbm_max_depth'],
    'min_data_in_leaf': best_params_sub2['lightgbm_min_data_in_leaf'],
    'num_leaves': best_params_sub2['lightgbm_num_leaves'],
    'learning_rate': best_params_sub2['lightgbm_learning_rate'],
    'feature_fraction': best_params_sub2['lightgbm_feature_fraction'],
    'bagging_fraction': best_params_sub2['bagging_fraction'],
    'bagging_freq': best_params_sub2['bagging_freq'],
    'lambda_l1': best_params_sub2['lambda_l1'],
    'lambda_l2': best_params_sub2['lambda_l2'],
    'device_type': 'gpu',
    'gpu_device_id': 0,
    'verbosity': -1
} '''
Light = LGBMRegressor(**LightGBM_Best_Params, random_state=SEED, verbose=-1, n_estimators=300)
XGB_Model = XGBRegressor(**XGB_Best_Params)
CatBoost_Model = CatBoostRegressor(**CatBoost_Best_Params)

# Combine models using Voting Regressor
voting_model = VotingRegressor(estimators=[
    ('lightgbm', Light),
    ('xgboost', XGB_Model),
    ('catboost', CatBoost_Model)
])
X = train.drop(['sii'], axis=1)
y = train['sii']

In [44]:
# submission3, val_score_sub3, _, _, _ = TrainML(voting_model, X_sub3, y_sub3, test_sub3)
# print("Val score sub3 with best parameters:", val_score_sub3)
# submission3.to_csv("submission3.csv", index=False)

# Submission 4

In [45]:
imputer = SimpleImputer(strategy='median')

ensemble = VotingRegressor(estimators=[
    ('lgb', Pipeline(steps=[('imputer', imputer), ('regressor', LGBMRegressor(random_state=SEED))])),
    ('xgb', Pipeline(steps=[('imputer', imputer), ('regressor', XGBRegressor(random_state=SEED))])),
    ('cat', Pipeline(steps=[('imputer', imputer), ('regressor', CatBoostRegressor(random_state=SEED, silent=True))])),
    ('rf', Pipeline(steps=[('imputer', imputer), ('regressor', RandomForestRegressor(random_state=SEED))])),
    ('gb', Pipeline(steps=[('imputer', imputer), ('regressor', GradientBoostingRegressor(random_state=SEED))]))
])

# submission4, val_score_sub4, _, _, _= TrainML(ensemble, X_sub3, y_sub3, test_sub3)

# Submission 5: MAE

In [46]:
time_series_cols = train_ts.columns.tolist()
time_series_cols.remove("id")
featuresCols = ['Basic_Demos-Enroll_Season', 'Basic_Demos-Age', 'Basic_Demos-Sex',
                'CGAS-Season', 'CGAS-CGAS_Score', 'Physical-Season', 'Physical-BMI',
                'Physical-Height', 'Physical-Weight', 'Physical-Waist_Circumference',
                'Physical-Diastolic_BP', 'Physical-HeartRate', 'Physical-Systolic_BP',
                'Fitness_Endurance-Season', 'Fitness_Endurance-Max_Stage',
                'Fitness_Endurance-Time_Mins', 'Fitness_Endurance-Time_Sec',
                'FGC-Season', 'FGC-FGC_CU', 'FGC-FGC_CU_Zone', 'FGC-FGC_GSND',
                'FGC-FGC_GSND_Zone', 'FGC-FGC_GSD', 'FGC-FGC_GSD_Zone', 'FGC-FGC_PU',
                'FGC-FGC_PU_Zone', 'FGC-FGC_SRL', 'FGC-FGC_SRL_Zone', 'FGC-FGC_SRR',
                'FGC-FGC_SRR_Zone', 'FGC-FGC_TL', 'FGC-FGC_TL_Zone', 'BIA-Season',
                'BIA-BIA_Activity_Level_num', 'BIA-BIA_BMC', 'BIA-BIA_BMI',
                'BIA-BIA_BMR', 'BIA-BIA_DEE', 'BIA-BIA_ECW', 'BIA-BIA_FFM',
                'BIA-BIA_FFMI', 'BIA-BIA_FMI', 'BIA-BIA_Fat', 'BIA-BIA_Frame_num',
                'BIA-BIA_ICW', 'BIA-BIA_LDM', 'BIA-BIA_LST', 'BIA-BIA_SMM',
                'BIA-BIA_TBW', 'PAQ_A-Season', 'PAQ_A-PAQ_A_Total', 'PAQ_C-Season',
                'PAQ_C-PAQ_C_Total', 'SDS-Season', 'SDS-SDS_Total_Raw',
                'SDS-SDS_Total_T', 'PreInt_EduHx-Season',
                'PreInt_EduHx-computerinternet_hoursday', 'sii',
               
                #  'PCIAT-Season', 'PCIAT-PCIAT_01', 'PCIAT-PCIAT_02', 'PCIAT-PCIAT_03', 'PCIAT-PCIAT_04',
                # 'PCIAT-PCIAT_05', 'PCIAT-PCIAT_06', 'PCIAT-PCIAT_07', 'PCIAT-PCIAT_08',
                # 'PCIAT-PCIAT_09', 'PCIAT-PCIAT_10', 'PCIAT-PCIAT_11', 'PCIAT-PCIAT_12',
                # 'PCIAT-PCIAT_13', 'PCIAT-PCIAT_14', 'PCIAT-PCIAT_15', 'PCIAT-PCIAT_16',
                # 'PCIAT-PCIAT_17', 'PCIAT-PCIAT_18', 'PCIAT-PCIAT_19', 'PCIAT-PCIAT_20', 'PCIAT-PCIAT_Total',
]
cat_c = ['Basic_Demos-Enroll_Season', 'CGAS-Season', 'Physical-Season', 
          'Fitness_Endurance-Season', 'FGC-Season', 'BIA-Season', 
          'PAQ_A-Season', 'PAQ_C-Season', 'SDS-Season', 'PreInt_EduHx-Season',
        # 'PCIAT-Season',
        ]
train = pd.read_csv('/kaggle/input/child-mind-institute-problematic-internet-use/train.csv')
test = pd.read_csv('/kaggle/input/child-mind-institute-problematic-internet-use/test.csv')
train = pd.merge(train, train_ts, how="left", on='id')
test = pd.merge(test, test_ts, how="left", on='id')

train = train.drop('id', axis=1)
test = test.drop('id', axis=1)

featuresCols += time_series_cols

train = train[featuresCols]
train = update(train)
test = update(test)
for col in cat_c:
    if col in train.columns:
        mapping = create_mapping(col, train)
        train[col] = train[col].replace(mapping).astype(int)
    if col in test.columns:
        mappingTe = create_mapping(col, test)
        test[col] = test[col].replace(mappingTe).astype(int)


full_df = pd.concat([train,test])
X_raw = torch.tensor(full_df.to_numpy()).float()
random_extend(X_raw, 5000).shape

for c in full_df.columns:
    if c not in test.columns:
        test[c] = float('nan')

test = test[full_df.columns]

X_tensor_test = torch.tensor(test.to_numpy()).float()

In [47]:
from torch.utils.data import DataLoader, RandomSampler


In [48]:
max_epochs = 100
num_folds = 2
warmup_epochs = max(1, max_epochs // 10)
imputer_args = Namespace(
    batch_size=64,
    max_epochs= max_epochs,
    accum_iter=1,
    mask_ratio=0.38,
    embed_dim=16,
    depth=6,
    decoder_depth=4,
    num_heads=4,
    mlp_ratio=4.0,
    encode_func='linear',
    norm_field_loss=False,
    weight_decay=0.05,
    lr=None, blr=0.001,
    min_lr=1e-05,
    warmup_epochs=warmup_epochs,
    device='cuda', seed=SEED, overwrite=True, pin_mem=True
)


SEED = random.randint(1, int(2e9))
np.random.seed(SEED)
indices = np.random.permutation(len(X_raw))
X_raw = X_raw[indices]
# submission5 = PerformImpute(imputer_args)
# submission5

# Final

In [49]:
"""sub1 = submission1
sub2 = submission2
sub3 = submission3
sub4 = submission4
sub5 = submission5

sub1 = sub1.sort_values(by='id').reset_index(drop=True)
sub2 = sub2.sort_values(by='id').reset_index(drop=True)
sub3 = sub3.sort_values(by='id').reset_index(drop=True)
sub4 = sub4.sort_values(by='id').reset_index(drop=True)
sub5 = sub5.sort_values(by='id').reset_index(drop=True)


combined = pd.DataFrame({
    'id': sub1['id'],
    'sii_1': sub1['sii'],
    'sii_2': sub2['sii'],
    'sii_3': sub3['sii'],
    'sii_4': sub4['sii'],
    'sii_5': sub5['sii'],
})

def majority_vote(row):
    return row.mode()[0]

combined['final_sii'] = combined[['sii_1', 'sii_2', 'sii_3', 'sii_4']].apply(majority_vote, axis=1)

final_submission = combined[['id', 'final_sii']].rename(columns={'final_sii': 'sii'})
"""
final_submission = submission1
final_submission.to_csv('submission.csv', index=False)

print("Majority voting completed and saved to 'Final_Submission.csv'")

Majority voting completed and saved to 'Final_Submission.csv'


In [50]:
final_submission

Unnamed: 0,id,sii
0,00008ff9,0
1,000fd460,0
2,00105258,1
3,00115b9f,0
4,0016bb22,2
5,001f3379,1
6,0038ba98,1
7,0068a485,0
8,0069fbed,2
9,0083e397,2
