In [1]:
import numpy as np
import pandas as pd
import os
from sklearn.base import clone
from sklearn.metrics import cohen_kappa_score
from sklearn.model_selection import StratifiedKFold
from scipy.optimize import minimize
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm
import torch

from colorama import Fore, Style
from IPython.display import clear_output
import warnings
from lightgbm import LGBMRegressor
from xgboost import XGBRegressor
from catboost import CatBoostRegressor
from sklearn.ensemble import VotingRegressor, RandomForestRegressor, GradientBoostingRegressor
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
warnings.filterwarnings('ignore')
pd.options.display.max_columns = None

In [2]:
import random
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

SEED = 314159
seed_everything(SEED)

n_splits = 5

In [3]:
def process_file(filename, dirname):
    df = pd.read_parquet(os.path.join(dirname, filename, 'part-0.parquet'))
    df.drop('step', axis=1, inplace=True)
    return df.describe().values.reshape(-1), filename.split('=')[1]

def load_time_series(dirname) -> pd.DataFrame:
    ids = os.listdir(dirname)
    
    with ThreadPoolExecutor() as executor:
        results = list(tqdm(executor.map(lambda fname: process_file(fname, dirname), ids), total=len(ids)))
    
    stats, indexes = zip(*results)
    
    df = pd.DataFrame(stats, columns=[f"stat_{i}" for i in range(len(stats[0]))])
    df['id'] = indexes
    return df
    
train_ts = load_time_series("../input/child-mind-institute-problematic-internet-use/series_train.parquet")
test_ts = load_time_series("../input/child-mind-institute-problematic-internet-use/series_test.parquet")

100%|██████████| 996/996 [01:17<00:00, 12.83it/s]
100%|██████████| 2/2 [00:00<00:00,  8.95it/s]


In [4]:
train = pd.read_csv('../input/child-mind-institute-problematic-internet-use/train.csv')
test = pd.read_csv('../input/child-mind-institute-problematic-internet-use/test.csv')
sample = pd.read_csv('../input/child-mind-institute-problematic-internet-use/sample_submission.csv')


featuresCols = ['Basic_Demos-Enroll_Season', 'Basic_Demos-Age', 'Basic_Demos-Sex',
                'CGAS-Season', 'CGAS-CGAS_Score', 'Physical-Season', 'Physical-BMI',
                'Physical-Height', 'Physical-Weight', 'Physical-Waist_Circumference',
                'Physical-Diastolic_BP', 'Physical-HeartRate', 'Physical-Systolic_BP',
                'Fitness_Endurance-Season', 'Fitness_Endurance-Max_Stage',
                'Fitness_Endurance-Time_Mins', 'Fitness_Endurance-Time_Sec',
                'FGC-Season', 'FGC-FGC_CU', 'FGC-FGC_CU_Zone', 'FGC-FGC_GSND',
                'FGC-FGC_GSND_Zone', 'FGC-FGC_GSD', 'FGC-FGC_GSD_Zone', 'FGC-FGC_PU',
                'FGC-FGC_PU_Zone', 'FGC-FGC_SRL', 'FGC-FGC_SRL_Zone', 'FGC-FGC_SRR',
                'FGC-FGC_SRR_Zone', 'FGC-FGC_TL', 'FGC-FGC_TL_Zone', 'BIA-Season',
                'BIA-BIA_Activity_Level_num', 'BIA-BIA_BMC', 'BIA-BIA_BMI',
                'BIA-BIA_BMR', 'BIA-BIA_DEE', 'BIA-BIA_ECW', 'BIA-BIA_FFM',
                'BIA-BIA_FFMI', 'BIA-BIA_FMI', 'BIA-BIA_Fat', 'BIA-BIA_Frame_num',
                'BIA-BIA_ICW', 'BIA-BIA_LDM', 'BIA-BIA_LST', 'BIA-BIA_SMM',
                'BIA-BIA_TBW', 'PAQ_A-Season', 'PAQ_A-PAQ_A_Total', 'PAQ_C-Season',
                'PAQ_C-PAQ_C_Total', 'SDS-Season', 'SDS-SDS_Total_Raw',
                'SDS-SDS_Total_T', 'PreInt_EduHx-Season',
                'PreInt_EduHx-computerinternet_hoursday', 'sii',
               
                 'PCIAT-Season', 'PCIAT-PCIAT_01', 'PCIAT-PCIAT_02', 'PCIAT-PCIAT_03', 'PCIAT-PCIAT_04',
                'PCIAT-PCIAT_05', 'PCIAT-PCIAT_06', 'PCIAT-PCIAT_07', 'PCIAT-PCIAT_08',
                'PCIAT-PCIAT_09', 'PCIAT-PCIAT_10', 'PCIAT-PCIAT_11', 'PCIAT-PCIAT_12',
                'PCIAT-PCIAT_13', 'PCIAT-PCIAT_14', 'PCIAT-PCIAT_15', 'PCIAT-PCIAT_16',
                'PCIAT-PCIAT_17', 'PCIAT-PCIAT_18', 'PCIAT-PCIAT_19', 'PCIAT-PCIAT_20', 'PCIAT-PCIAT_Total',
]

cat_c = ['Basic_Demos-Enroll_Season', 'CGAS-Season', 'Physical-Season', 
          'Fitness_Endurance-Season', 'FGC-Season', 'BIA-Season', 
          'PAQ_A-Season', 'PAQ_C-Season', 'SDS-Season', 'PreInt_EduHx-Season',
        'PCIAT-Season',
        ]


time_series_cols = train_ts.columns.tolist()
time_series_cols.remove("id")

train = pd.merge(train, train_ts, how="left", on='id')
test = pd.merge(test, test_ts, how="left", on='id')

train = train.drop('id', axis=1)
test = test.drop('id', axis=1)

featuresCols += time_series_cols

train = train[featuresCols]
# train = train.dropna(subset='sii')

def update(df):
    global cat_c
    for c in cat_c: 
        if c not in df.columns: continue
        df[c] = df[c].fillna('Missing')
        df[c] = df[c].astype('category')
    return df

train = update(train)
test = update(test)

def create_mapping(column, dataset):
    unique_values = dataset[column].unique()
    return {value: idx for idx, value in enumerate(unique_values)}

for col in cat_c:
    if col in train.columns:
        if 'Season' in col:
            mapping = {
                'Missing': float('nan'),
                'Spring': 0.,
                'Summer': 1.,
                'Fall': 2.,
                'Winter': 3.,
            }
        else:
            mapping = create_mapping(col, train)
            print(f'{col}: {mapping}')
        train[col] = train[col].replace(mapping)
    if col in test.columns:
        if 'Season' in col:
            mappingTe = {
                'Missing': float('nan'),
                'Spring': 0.,
                'Summer': 1.,
                'Fall': 2.,
                'Winter': 3.,
            }
        else:
            mappingTe = create_mapping(col, test)
            print(f'{col}: {mappingTe}')
            
        test[col] = test[col].replace(mappingTe)

def quadratic_weighted_kappa(y_true, y_pred):
    return cohen_kappa_score(y_true, y_pred, weights='quadratic')

def threshold_Rounder(oof_non_rounded, thresholds):
    return np.where(oof_non_rounded < thresholds[0], 0,
                    np.where(oof_non_rounded < thresholds[1], 1,
                             np.where(oof_non_rounded < thresholds[2], 2, 3)))

def evaluate_predictions(thresholds, y_true, oof_non_rounded):
    rounded_p = threshold_Rounder(oof_non_rounded, thresholds)
    return -quadratic_weighted_kappa(y_true, rounded_p)


In [5]:
# current implementation: only support numerical values

from functools import partial
from tkinter import E

import torch
import numpy as np
import torch.nn as nn
import pandas as pd
from timm.models.vision_transformer import Block

# current implementation: only support numerical values
import numpy as np
import torch, os
from torch import nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import math
import argparse
import random

class MaskEmbed(nn.Module):
    """ record to mask embedding
    """
    def __init__(self, rec_len=25, embed_dim=64, norm_layer=None):
        
        super().__init__()
        self.rec_len = rec_len
        self.proj = nn.Conv1d(1, embed_dim, kernel_size=1, stride=1)
        self.norm = norm_layer(embed_dim) if norm_layer else nn.Identity()

    def forward(self, x):
        B, _, L = x.shape
        # assert(L == self.rec_len, f"Input data width ({L}) doesn't match model ({self.rec_len}).")
        x = self.proj(x)
        x = x.transpose(1, 2)
        x = self.norm(x)
        return x


class ActiveEmbed(nn.Module):
    """ record to mask embedding
    """
    def __init__(self, rec_len=25, embed_dim=64, norm_layer=None):
        
        super().__init__()
        self.rec_len = rec_len
        self.proj = nn.Conv1d(1, embed_dim, kernel_size=1, stride=1)
        self.norm = norm_layer(embed_dim) if norm_layer else nn.Identity()

    def forward(self, x):
        B, _, L = x.shape
        # assert(L == self.rec_len, f"Input data width ({L}) doesn't match model ({self.rec_len}).")
        x = self.proj(x)
        x = torch.sin(x)
        x = x.transpose(1, 2)
        #   x = torch.cat((torch.sin(x), torch.cos(x + math.pi/2)), -1)
        x = self.norm(x)
        return x



def get_1d_sincos_pos_embed(embed_dim, pos, cls_token=False):
    """
    embed_dim: output dimension for each position
    pos: a list of positions to be encoded: size (M,)
    out: (M, D)
    """

    assert embed_dim % 2 == 0
    omega = np.arange(embed_dim // 2, dtype=np.float32)
    omega /= embed_dim / 2.
    omega = 1. / 10000**omega  # (D/2,)

    pos = np.arange(pos)  # (M,)
    out = np.einsum('m,d->md', pos, omega)  # (M, D/2), outer product

    emb_sin = np.sin(out) # (M, D/2)
    emb_cos = np.cos(out) # (M, D/2)

    pos_embed = np.concatenate([emb_sin, emb_cos], axis=1)  # (M, D)

    if cls_token:
        pos_embed = np.concatenate([np.zeros([1, embed_dim]), pos_embed], axis=0)

    return pos_embed


def adjust_learning_rate(optimizer, epoch, lr, min_lr, max_epochs, warmup_epochs):
    """Decay the learning rate with half-cycle cosine after warmup"""
    if epoch < warmup_epochs:
        tmp_lr = lr * epoch / warmup_epochs 
    else:
        tmp_lr = min_lr + (lr - min_lr) * 0.5 * \
            (1. + math.cos(math.pi * (epoch - warmup_epochs) / (max_epochs - warmup_epochs)))
    for param_group in optimizer.param_groups:
        if "lr_scale" in param_group:
            param_group["lr"] = tmp_lr * param_group["lr_scale"]
        else:
            param_group["lr"] = tmp_lr
    return tmp_lr


def get_grad_norm_(parameters, norm_type: float = 2.0) -> torch.Tensor:
    if isinstance(parameters, torch.Tensor):
        parameters = [parameters]
    parameters = [p for p in parameters if p.grad is not None]
    norm_type = float(norm_type)
    if len(parameters) == 0:
        return torch.tensor(0.)
    device = parameters[0].grad.device
    if norm_type == np.inf:
        total_norm = max(p.grad.detach().abs().max().to(device) for p in parameters)
    else:
        total_norm = torch.norm(torch.stack([torch.norm(p.grad.detach(), norm_type).to(device) for p in parameters]), norm_type)
    return total_norm


class NativeScaler:

    state_dict_key = "amp_scaler"
    def __init__(self):
        self._scaler = torch.cuda.amp.GradScaler()

    def __call__(self, loss, optimizer, clip_grad=None, parameters=None, create_graph=False, update_grad=True):
        self._scaler.scale(loss).backward(create_graph=create_graph)
        if update_grad:
            if clip_grad is not None:
                assert parameters is not None
                self._scaler.unscale_(optimizer)  # unscale the gradients of optimizer's assigned params in-place
                norm = torch.nn.utils.clip_grad_norm_(parameters, clip_grad)
            else:
                self._scaler.unscale_(optimizer)
                norm = get_grad_norm_(parameters)
            self._scaler.step(optimizer)
            self._scaler.update()
        else:
            norm = None
        return norm

    def state_dict(self):
        return self._scaler.state_dict()

    def load_state_dict(self, state_dict):
        self._scaler.load_state_dict(state_dict)



class MAEDataset(Dataset):

    def __init__(self, X, M):        
         self.X = X
         self.M = M

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx: int):
        return self.X[idx], self.M[idx]



def get_dataset(dataset : str, path : str):

    if dataset in ['climate', 'compression', 'wine', 'yacht', 'spam', 'letter', 'credit', 'raisin', 'bike', 'obesity', 'airfoil', 'blood', 'yeast', 'health', 'review', 'travel']:
        df = pd.read_csv(os.path.join(path, 'data', dataset + '.csv'))
        last_col = df.columns[-1]
        y = df[last_col]
        X = df.drop(columns=[last_col])
    elif dataset == 'california':
        from sklearn.datasets import fetch_california_housing
        X, y = fetch_california_housing(as_frame=True, return_X_y=True)
    elif dataset == 'diabetes':
        from sklearn.datasets import load_diabetes
        X, y = load_diabetes(as_frame=True, return_X_y=True)
    elif dataset == 'iris':
        # only for testing
        from sklearn.datasets import load_iris
        X, y = load_iris(as_frame=True, return_X_y=True)
    

    return X, y


eps = 1e-6
import torch
import torch.nn as nn
import numpy as np
from torch.nn import TransformerEncoderLayer
from transformers.models.bert.modeling_bert import BertPooler

class MaskedAutoencoder(nn.Module):
    def __init__(self, rec_len=25, embed_dim=64, depth=4, num_heads=4,
                 decoder_embed_dim=64, decoder_depth=2, decoder_num_heads=4,
                 mlp_ratio=4., cls_mlp_dim=64, norm_layer=nn.LayerNorm, norm_field_loss=False,
                 encode_func='linear', dropout=0.0):
        super().__init__()
        
        self.rec_len = rec_len
        self.embed_dim = embed_dim
        self.norm_field_loss = norm_field_loss
        
        # Encoder
        if encode_func == 'active':
            self.mask_embed = ActiveEmbed(rec_len, embed_dim, norm_layer)
        else:
            self.mask_embed = MaskEmbed(rec_len, embed_dim, norm_layer)
        
        self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
        self.pos_embed = nn.Parameter(torch.zeros(1, rec_len + 1, embed_dim), requires_grad=False)
        
        encoder_layer = TransformerEncoderLayer(d_model=embed_dim, nhead=num_heads, dim_feedforward=int(embed_dim * mlp_ratio),
                        dropout=dropout, batch_first=True)
        self.blocks = nn.TransformerEncoder(encoder_layer, depth)
        self.norm = norm_layer(embed_dim)
        
        
        self.enc_pooler = nn.ModuleList([
            nn.Sequential(
                nn.Linear(embed_dim, 1),
            ),
            nn.Sequential(
                nn.Linear((rec_len + 1) * 1, cls_mlp_dim), nn.ReLU(),
            ),
        ])
        self.enc_lbl_pred = nn.Sequential(
            nn.Linear(cls_mlp_dim, cls_mlp_dim), nn.ReLU(),
            # nn.Linear(cls_mlp_dim, cls_mlp_dim), nn.ReLU(),
            # nn.Dropout(dropout),
            nn.Linear(cls_mlp_dim, cls_mlp_dim), nn.Sigmoid(),
        )
        
        # Decoder
        self.decoder_embed = nn.Linear(embed_dim, decoder_embed_dim, bias=True)
        self.mask_token = nn.Parameter(torch.zeros(1, 1, decoder_embed_dim))
        self.decoder_pos_embed = nn.Parameter(torch.zeros(1, rec_len + 1, decoder_embed_dim), requires_grad=False)
        
        decoder_layer = TransformerEncoderLayer(d_model=decoder_embed_dim, nhead=decoder_num_heads, dim_feedforward=int(decoder_embed_dim * mlp_ratio),
                        dropout=dropout, batch_first=True)
        self.decoder_blocks = nn.TransformerEncoder(decoder_layer, decoder_depth)
        self.decoder_norm = norm_layer(decoder_embed_dim)
        self.decoder_pred = nn.Linear(decoder_embed_dim, 1, bias=True)
        
        
        self.initialize_weights()

    def initialize_weights(self):
        pos_embed = get_1d_sincos_pos_embed(self.pos_embed.shape[-1], self.rec_len, cls_token=True)
        self.pos_embed.data.copy_(torch.from_numpy(pos_embed).float().unsqueeze(0))
        
        decoder_pos_embed = get_1d_sincos_pos_embed(self.decoder_pos_embed.shape[-1], self.rec_len, cls_token=True)
        self.decoder_pos_embed.data.copy_(torch.from_numpy(decoder_pos_embed).float().unsqueeze(0))
        
        torch.nn.init.xavier_uniform_(self.mask_embed.proj.weight.view([self.mask_embed.proj.weight.shape[0], -1]))
        torch.nn.init.normal_(self.cls_token, std=.02)
        torch.nn.init.normal_(self.mask_token, std=.02)
        
        self.apply(self._init_weights)

    def _init_weights(self, m):
        if isinstance(m, nn.Linear):
            torch.nn.init.xavier_uniform_(m.weight)
            if m.bias is not None:
                nn.init.constant_(m.bias, 0)
        elif isinstance(m, nn.LayerNorm):
            nn.init.constant_(m.bias, 0)
            nn.init.constant_(m.weight, 1.0)

    def random_masking(self, x, m, mask_ratio, training=None):
        N, L, D = x.shape
        if training is None:
            training = self.training
        if training:
            len_keep = int(L * (1 - mask_ratio))
            noise = torch.rand(N, L, device=x.device)
            noise[m < 1e-6] = 1
            ids_shuffle = torch.argsort(noise, dim=1)
            ids_restore = torch.argsort(ids_shuffle, dim=1)
            ids_keep = ids_shuffle[:, :len_keep]
            mask = torch.ones([N, L], device=x.device)
            mask[:, :len_keep] = 0
            mask = torch.gather(mask, dim=1, index=ids_restore)
            mask = torch.logical_or(mask, ~m.bool())
            nask = ~mask
            return mask, nask
        else:
            mask = ~m.bool()
            nask = m.bool()
            return mask, nask

    def forward_encoder(self, x, m, mask_ratio=0.5, training=None):
        x = self.mask_embed(x)
        x = x + self.pos_embed[:, 1:, :]
        mask, nask = self.random_masking(x, m, mask_ratio, training)
        x = x * (~mask.unsqueeze(-1)).float()
        cls_token = self.cls_token + self.pos_embed[:, :1, :]
        cls_tokens = cls_token.expand(x.shape[0], -1, -1)
        x = torch.cat((cls_tokens, x), dim=1)
        attn_mask = torch.cat((torch.zeros(x.shape[0], 1, device=x.device), mask), dim=1)
        x = self.blocks(src=x, src_key_padding_mask=attn_mask.bool())
        x = self.norm(x)
        return x, mask, nask

    def forward_decoder(self, x, mask):
        x = self.decoder_embed(x)
        x = x + self.decoder_pos_embed
        mask_with_cls = torch.cat((torch.zeros(x.shape[0], 1, device=x.device), mask), dim=1)
        x = self.blocks(src=x, src_key_padding_mask=mask_with_cls.bool())
        
        x = self.decoder_norm(x)
        x = self.decoder_pred(x)
        x = x[:, 1:, :].sigmoid()
        return x

    def forward_loss(self, data, pred, m, mask, nask):
        target = data.squeeze(dim=1)
        # if self.norm_field_loss:
        #     mean = target.mean(dim=-1, keepdim=True)
        #     var = target.var(dim=-1, keepdim=True)
        #     target = (target - mean) / (var + 1e-6) ** 0.5
        rec_mask = mask * m
        loss = (pred.squeeze(dim=2) - target) ** 2
        loss = (loss * rec_mask).sum() / (rec_mask.sum() + 1e-6) + (loss * nask).sum() / (nask.sum() + 1e-6)
        return loss

    def forward(self, data, m):
        x, _, _ = self.forward_encoder(data, m, 0.0, False)
        B = x.shape[0]
        h = self.enc_pooler[1](self.enc_pooler[0](x).reshape(B, -1))
        enc_pred = self.enc_lbl_pred(h)[:, 0]
        return enc_pred

    def forward_selfsl(self, data, m, mask_ratio=0.5, training=None):
        x, mask, nask = self.forward_encoder(data, m, mask_ratio, training)
        pred = self.forward_decoder(x, mask)
        loss = self.forward_loss(data, pred, m, mask, nask)
        return loss, (loss.item(), )

    def forward_sl(self, data, m, lbl_cols):
        num_lbls = len(lbl_cols)
        lbl_mask = m[:, lbl_cols]
        ft_mask = m.clone()
        ft_mask[:, lbl_cols] = 0
        ft = data.clone()
        ft[:, :, lbl_cols] = 0
        x, _, _ = self.forward_encoder(ft, ft_mask, 0.0, False)
        B = x.shape[0]
        h = self.enc_pooler[1](self.enc_pooler[0](x).reshape(B, -1))
        enc_pred = self.enc_lbl_pred(h)[:, :num_lbls]

        tgt = data[:, 0, lbl_cols]
        enc_loss = (((enc_pred - tgt) ** 2) * lbl_mask).sum() / (lbl_mask.sum() + 1e-6)
        
        loss = enc_loss
        
        return loss, (enc_loss.item(), )
      
    def forward_semisl(self, data, m, lbl_cols, ema_model=None, hard=False):
        num_lbls = len(lbl_cols)
        lbl_mask = m[:, lbl_cols]
        nlbl_mask = 1 - m[:, lbl_cols]
        ft_mask = m.clone()
        ft_mask[:, lbl_cols] = 0
        ft = data.clone()
        ft[:, :, lbl_cols] = 0
        
        noise = torch.randn_like(ft)
        noise_norm = torch.norm(noise, p=2, dim=-1, keepdim=True)
        noise = noise / (noise_norm + 1e-8)
        noise = noise * 0.0
        if not hard: noise *= 0
        
        x, _, _ = self.forward_encoder(torch.clamp(ft + noise, min=0.0, max=1.0), ft_mask, 0.0, False)
        B = x.shape[0]
        h = self.enc_pooler[1](self.enc_pooler[0](x).reshape(B, -1))
        enc_pred = self.enc_lbl_pred(h)[:, :num_lbls]
        
        if ema_model is None: raise NotImplementedError()
        # with torch.no_grad():
        #     x_tgt, _, _ = ema_model.forward_encoder(ft, ft_mask, 0.0, False)
            
        #     B = x.shape[0]
        #     h_tgt = ema_model.enc_pooler[1](ema_model.enc_pooler[0](x_tgt).reshape(B, -1))
        #     semisl_tgt = ema_model.enc_lbl_pred(h_tgt)[:, :num_lbls].detach()
        #     if hard:
        #         semisl_tgt[:, 0] = (semisl_tgt[:, 0] * 3.).round() / 3.
        #         semisl_weight = 1.0
        #     else:
        #         semisl_weight = 0.1
        
        
        # semisl_loss = (((enc_pred - semisl_tgt) ** 2) * nlbl_mask).sum() / (nlbl_mask.sum() + 1e-6)

        
        with torch.no_grad():
            if hard:
                x_tgt, _, _ = ema_model.forward_encoder(ft, ft_mask, 0.0, False)
                
                B = x_tgt.shape[0]
                h_tgt = ema_model.enc_pooler[1](ema_model.enc_pooler[0](x_tgt).reshape(B, -1))
                semisl_tgt = ema_model.enc_lbl_pred(h_tgt)[:, :num_lbls].detach()
                semisl_tgt[:, 0] = (semisl_tgt[:, 0] * 3.).round() / 3.
                semisl_weight = 1.0
                semisl_loss = (
                    0.5 * (((enc_pred - semisl_tgt) ** 2) * nlbl_mask).sum() / (nlbl_mask.sum() + 1e-6) # all labels
                    + 0.5 * (((enc_pred[:, 0] - semisl_tgt[:, 0]) ** 2) * nlbl_mask[:, 0]).sum() / (nlbl_mask[:, 0].sum() + 1e-6) # sii only
                )
            else:
                semisl_weight = 0.0
                semisl_loss = torch.tensor([0.0]).to(x.device)
                

        

        sl_tgt = data[:, 0, lbl_cols]
        sl_loss = (((enc_pred - sl_tgt) ** 2) * lbl_mask).sum() / (lbl_mask.sum() + 1e-6)
        
        loss = 1.0 * sl_loss + semisl_weight * semisl_loss
        return loss, (sl_loss.item(), semisl_loss.item(),)
      
      

In [6]:
# stdlib
from typing import Any, List, Tuple, Union

# third party
import numpy as np
import math, sys, argparse
import pandas as pd
import torch
from torch import nn
from functools import partial
import time, os, json
from torch.utils.data import DataLoader, RandomSampler
import sys
import timm.optim.optim_factory as optim_factory
import torch.nn.functional as F
import copy


def quadratic_weighted_kappa(y_true, y_pred):
    return cohen_kappa_score(y_true, y_pred, weights='quadratic')



eps = 1e-8
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

from argparse import Namespace
remasker_args = Namespace(
    batch_size=64,
    max_epochs= 600,
    accum_iter=1,
    mask_ratio=0.5,
    embed_dim=32,
    depth=6,
    decoder_depth=4,
    num_heads=4,
    mlp_ratio=4.0,
    encode_func='linear',
    norm_field_loss=False,
    weight_decay=0.05,
    lr=None, blr=0.001,
    min_lr=1e-05,
    warmup_epochs=40,
    device='cuda', seed=SEED, overwrite=True, pin_mem=True
)



def set_dropout_p(m, p):
    if isinstance(m, nn.Dropout):
        m.p = p

def update_ema_variables(model, ema_model, alpha, global_step, max_global_step):
    # Use the true average until the exponential average is more correct
    def f(alpha, t, T):
        A = 1
        B = alpha * T / (T - 1)
        return (B * (1 - A / (t + 1)))
    current_alpha = f(alpha, global_step, max_global_step)
    for ema_param, param in zip(ema_model.parameters(), model.parameters()):
        ema_param.data.mul_(current_alpha).add_(1 - current_alpha, param.data)


class ReMasker:

    def __init__(self, args=remasker_args):

        self.batch_size = args.batch_size
        self.accum_iter = args.accum_iter
        self.min_lr = args.min_lr
        self.norm_field_loss = args.norm_field_loss
        # self.weight_decay = args.weight_decay
        self.lr = args.lr
        self.blr = args.blr
        self.warmup_epochs = max(1, args.max_epochs // 10)
        self.ema_decay = args.ema_decay
        self.model = None
        self.norm_parameters = None

        self.embed_dim = args.embed_dim
        self.depth = args.depth
        self.decoder_depth = args.decoder_depth
        self.num_heads = args.num_heads
        self.mlp_ratio = args.mlp_ratio
        self.cls_mlp_dim = args.cls_mlp_dim
        self.max_epochs = args.max_epochs
        self.mask_ratio = args.mask_ratio
        self.encode_func = args.encode_func
        self.dropout = args.dropout


    def fit(self, X_raw: pd.DataFrame, X_val=None, lbl_cols=None, model=None):
        global dbg_var
        X = X_raw.clone()

        # Parameters
        no = len(X)
        dim = len(X[0, :])

        X = X.cpu()

        min_val = np.zeros(dim)
        max_val = np.zeros(dim)

        for i in range(dim):
            min_val[i] = np.nanmin(X[:, i])
            max_val[i] = np.nanmax(X[:, i])
            X[:, i] = (X[:, i] - min_val[i]) / (max_val[i] - min_val[i] + eps)

        self.norm_parameters = {"min": min_val, "max": max_val}

        # Set missing
        M = 1 - (1 * (np.isnan(X)))
        M = M.float().to(device)

        X = torch.nan_to_num(X)
        X = X.to(device)

        if model is None:
            self.model = MaskedAutoencoder(
                rec_len=dim,
                embed_dim=self.embed_dim,
                depth=self.depth,
                num_heads=self.num_heads,
                decoder_embed_dim=self.embed_dim,
                decoder_depth=self.decoder_depth,
                decoder_num_heads=self.num_heads,
                mlp_ratio=self.mlp_ratio,
                cls_mlp_dim=self.cls_mlp_dim,
                norm_layer=partial(nn.LayerNorm, eps=eps),
                norm_field_loss=self.norm_field_loss,
                encode_func=self.encode_func,
                dropout=self.dropout,
            )
        else:
            self.model = copy.deepcopy(model)
            for param in self.model.blocks.layers[:].parameters():
                param.detach_()
        self.ema_model = copy.deepcopy(self.model)
        self.ema_model.apply(lambda m: set_dropout_p(m, p=0.0))
        for param in self.ema_model.parameters():
            param.detach_()
        

        self.model.to(device)
        self.ema_model.to(device).eval()

        # set optimizers
        # param_groups = optim_factory.add_weight_decay(model, args.weight_decay)
        eff_batch_size = self.batch_size * self.accum_iter
        if self.lr is None:  # only base_lr is specified
            self.lr = self.blr * eff_batch_size / 64
        # param_groups = optim_factory.add_weight_decay(self.model, self.weight_decay)
        # optimizer = torch.optim.AdamW(param_groups, lr=self.lr, betas=(0.9, 0.95))
        optimizer = torch.optim.AdamW(self.model.parameters(), lr=self.lr, betas=(0.9, 0.95))
        loss_scaler = NativeScaler()

        dataset = MAEDataset(X, M)
        dataloader = DataLoader(
            dataset, sampler=RandomSampler(dataset),
            batch_size=self.batch_size,
        )


        best_loss = 1e9
        best_model = copy.deepcopy(self.model)
        for epoch in range(self.max_epochs):
            self.model.train()

            optimizer.zero_grad()
            total_loss = 0
            lbl_loss = 0.
            
            
            import time
            dbgt1 = 0
            dbgt2 = 0
            dbgt3 = 0

            iter = 0
            for iter, (samples, masks) in enumerate(dataloader):

                # we use a per iteration (instead of per epoch) lr scheduler
                if iter % self.accum_iter == 0:
                    adjust_learning_rate(optimizer, iter / len(dataloader) + epoch, self.lr, self.min_lr,
                                         self.max_epochs, self.warmup_epochs)

                samples = samples.unsqueeze(dim=1)
                samples = samples.to(device, non_blocking=True)
                masks = masks.to(device, non_blocking=True)

                # print(samples, masks)

                # with torch.cuda.amp.autocast():

                if lbl_cols is not None:
                    input_samples = samples.clone()
                    input_masks = masks.clone()
                    
                    
                    # selfsl_loss, dbg_selfsl_loss = self.model.forward_selfsl(samples, masks, mask_ratio=self.mask_ratio)
                    
                    # sl_loss, dbg_sl_loss = self.model.forward_sl(input_samples, input_masks, lbl_cols)
                    
                    
                    hard = True if (epoch >= (self.max_epochs // 2)) else False
                    semisl_loss, dbg_semisl_loss = self.model.forward_semisl(input_samples, input_masks, lbl_cols, ema_model=self.ema_model, hard=hard)
                    loss = 1.0 * semisl_loss
                        
                else:

                    selfsl_loss, dbg_selfsl_loss = self.model.forward_selfsl(samples, masks, mask_ratio=self.mask_ratio)
                    loss = selfsl_loss
                
                loss_value = loss.item()
                total_loss += loss_value
                if not math.isfinite(loss_value):
                    print("Loss is {}, stopping training".format(loss_value))
                    dbg_var = (samples, masks)
                    sys.exit(1)

                loss /= self.accum_iter
                loss_scaler(loss, optimizer, parameters=self.model.parameters(),
                            update_grad=(iter + 1) % self.accum_iter == 0)

                if (iter + 1) % self.accum_iter == 0:
                    optimizer.zero_grad()
                    
                
            update_ema_variables(self.model, self.ema_model, self.ema_decay, epoch, self.max_epochs)
            # print(dbgt1)
            # print(dbgt2)
            # print(dbgt3)
            total_loss = (total_loss / (iter + 1))
            self.model.eval()
            if X_val is not None:
                val_loss = self.evaluate(X_val, lbl_cols)
            else:
                val_loss = total_loss
            if val_loss <= best_loss:
                best_loss = val_loss
                best_model = copy.deepcopy(self.model)
            if (epoch + 1) % max(1, self.max_epochs // 10) == 0 or epoch == 0:
                lbl_loss = lbl_loss / (iter + 1)
                
                if lbl_cols is not None:
                    print("Epoch: %d, train;val;best qwk: %.4f;%.4f;%.4f, loss: %.4f, val_loss: %.4f" % 
                        (epoch+1, -self.evaluate(X_raw, lbl_cols), -val_loss, -best_loss, total_loss, val_loss)
                    )
                else:
                    print("Epoch: %d, loss: %.4f" % 
                        (epoch+1, -best_loss)
                    )
                    
                

        self.model = best_model
        print(f'Loaded best model with loss={best_loss:.4f}')
        # torch.save(self.model.state_dict(), self.path)
        return self
      
      
      
    def evaluate(self, X_raw: torch.Tensor, lbl_cols):
        keep_indices = torch.where(~X_raw[:, lbl_cols].isnan())[0]
        X_raw = X_raw[keep_indices]
        gt = X_raw[:, lbl_cols[0]].cpu().numpy().round(0).astype(int)
        X_raw[:, lbl_cols] = float('nan')
        yp = self.predict(X_raw, lbl_cols)
        yp = yp.cpu().numpy().round(0).astype(int)
        return -quadratic_weighted_kappa(gt, yp)
      
      
    def predict(self, X_raw: torch.Tensor, lbl_idx, bs=None):
        X_raw = torch.tensor(X_raw, dtype=torch.float32)
        
        # Normalize the input data
        min_val = self.norm_parameters["min"]
        max_val = self.norm_parameters["max"]
        X = X_raw.clone()
        for i in range(X.shape[1]):
            X[:, i] = (X[:, i] - min_val[i]) / (max_val[i] - min_val[i] + eps)
        
        M = (1 - (1 * torch.isnan(X))).float().to(device)
        
        X = torch.nan_to_num(X)
        X = X.to(device)
        
        if bs == None: bs = self.batch_size
        # Prepare DataLoader
        dataset = MAEDataset(X, M)
        dataloader = DataLoader(dataset, batch_size=bs, shuffle=False)
        
        # Ensure model is in evaluation mode
        self.model.eval()
        
        # Tensor to hold predictions
        predictions = torch.zeros(0).to(device)
        
        with torch.no_grad():
            for batch_samples, batch_masks in dataloader:
                # Prepare input for the model
                batch_samples = batch_samples.unsqueeze(dim=1).to(device)
                batch_masks = batch_masks.to(device)
                
                # Forward pass with training=False
                pred = self.model.forward(batch_samples, batch_masks)
                
                pred = pred.reshape(-1)
                
                predictions = torch.cat((predictions, pred), 0)
        
        return predictions * 3.


    # def transform(self, X_raw: torch.Tensor):

    #     X = X_raw.clone()

    #     min_val = self.norm_parameters["min"]
    #     max_val = self.norm_parameters["max"]

    #     no, dim = X.shape
    #     X = X.cpu()

    #     # MinMaxScaler normalization
    #     for i in range(dim):
    #         X[:, i] = (X[:, i] - min_val[i]) / (max_val[i] - min_val[i] + eps)

    #     # Set missing
    #     M = 1 - (1 * (np.isnan(X)))
    #     X = np.nan_to_num(X)

    #     X = torch.from_numpy(X).to(device)
    #     M = M.to(device)

    #     self.model.eval()

    #     bs = 64
    #     # Imputed data
    #     with torch.no_grad():
    #         for i in range(0, no, bs):
    #             sample = X[i:i+bs][:, None]
    #             mask = M[i:i+bs]
    #             _, pred, _, _ = self.model(sample, mask, 0.5, False)
    #             pred = pred[:, :, 0]
    #             if i == 0:
    #                 imputed_data = pred
    #             else:
    #                 imputed_data = torch.cat((imputed_data, pred), 0)
    #     print(imputed_data.shape)
                    
    #                 # Renormalize
    #     for i in range(dim):
    #         imputed_data[:, i] = imputed_data[:, i] * (max_val[i] - min_val[i] + eps) + min_val[i]


    #     if np.all(np.isnan(imputed_data.detach().cpu().numpy())):
    #         err = "The imputed result contains nan. This is a bug. Please report it on the issue tracker."
    #         raise RuntimeError(err)

    #     M = M.cpu()
    #     imputed_data = imputed_data.detach().cpu()
    #     # print('imputed', imputed_data, M)
    #     # print('imputed', M * np.nan_to_num(X_raw.cpu()) + (1 - M) * imputed_data)
    #     return imputed_data
    
    
    def fit_transform(self, X: torch.Tensor) -> torch.Tensor:
        """Imputes the provided dataset using the GAIN strategy.
        Args:
            X: np.ndarray
                A dataset with missing values.
        Returns:
            Xhat: The imputed dataset.
        """
        X = torch.tensor(X.values, dtype=torch.float32)
        return self.fit(X).transform(X).detach().cpu().numpy()

In [7]:
full_df = pd.concat([train,test])

In [8]:
X_raw = torch.tensor(full_df.to_numpy()).float()

In [9]:
print(X_raw.shape)

torch.Size([3980, 177])


In [10]:

def random_extend(arr, k):
    indices = np.concatenate([np.random.permutation(len(arr)) for _ in range(10)])[:k]
    return arr[indices]

random_extend(X_raw, 5000).shape

torch.Size([5000, 177])

In [11]:

for c in full_df.columns:
    if c not in test.columns:
        test[c] = float('nan')

test = test[full_df.columns]

X_tensor_test = torch.tensor(test.to_numpy()).float()

In [12]:
full_df.columns.get_loc('sii')

58

In [13]:

def get_model_size(model):
    model_parameters = filter(lambda p: p.requires_grad, model.parameters())
    model_size = sum([np.prod(p.size()) for p in model_parameters])
    return "{}K".format(round(model_size / 1e1) / 1e2)
  
# get_model_size(imputer.model.blocks.layers[:-1])

In [14]:

from sklearn.base import clone

def quadratic_weighted_kappa(y_true, y_pred):
    return cohen_kappa_score(y_true, y_pred, weights='quadratic')

def threshold_Rounder(oof_non_rounded, thresholds):
    return np.where(oof_non_rounded < thresholds[0], 0,
                    np.where(oof_non_rounded < thresholds[1], 1,
                             np.where(oof_non_rounded < thresholds[2], 2, 3)))

def evaluate_predictions(thresholds, y_true, oof_non_rounded):
    rounded_p = threshold_Rounder(oof_non_rounded, thresholds)
    return -quadratic_weighted_kappa(y_true, rounded_p)

from tqdm import tqdm
from sklearn.model_selection import KFold


def random_extend(arr, k):
    indices = np.concatenate([np.random.permutation(len(arr)) for _ in range(10)])[:k]
    return arr[indices]


num_folds = 5

def PerformImpute(imputer_args):
    global X_raw, X_tensor_test, num_folds
    train_S = []
    test_S = []
    
    KF = KFold(n_splits=num_folds, shuffle=True, random_state=SEED)

    oof_non_rounded = []
    oof_rounded = []
    oof_gt = []
    test_preds = np.zeros((len(X_tensor_test), num_folds))
    
    
    lbl_cols = [full_df.columns.get_loc(c) for c in full_df.columns if 'PCIAT' in c or 'sii' in c]
    lbl_idx = lbl_cols[0]
    
    lbled_indices = torch.where(~X_raw[:, lbl_idx].isnan())[0]
    
    
    
    X_raw_no_lbl = X_raw.clone()
    X_raw_no_lbl[:, lbl_cols] = float('nan')
    
    pretrain_args = copy.deepcopy(imputer_args)
    pretrain_args.max_epochs = pretrain_args.pretrain_epochs
    import time
    pretrain_tick = time.time()
    imputer_pretrain = ReMasker(pretrain_args)
    imputer_pretrain.fit(X_raw_no_lbl, None, None, None)
    ellapsed_time = time.time() - pretrain_tick
    print(f"Pretrained in {ellapsed_time:.4f}s.")
    
    pretrain_model = imputer_pretrain.model

    

    pbar = tqdm(KF.split(lbled_indices), desc="Training Folds", total=n_splits)    

    for fold, (train_idx_idx, test_idx_idx) in enumerate(pbar):
        train_idx = lbled_indices[train_idx_idx]
        test_idx = lbled_indices[test_idx_idx]

        X_train = X_raw.clone()
        
        
        
        X_train[test_idx.unsqueeze(1), lbl_cols] = float('nan')
        
        X_val = X_raw[test_idx].clone()

        

        X_train = random_extend(X_train, 9000)
        # X_val = random_extend(X_val, 2000)
        
    
        train_nonna_indices = torch.where(~X_train[:, lbl_idx].isnan())[0]
        val_nonna_indices = torch.where(~X_val[:, lbl_idx].isnan())[0]
        if len(train_nonna_indices) == 0 or len(val_nonna_indices)==0: continue
    
        imputer = ReMasker(imputer_args)
        imputer.fit(X_train, X_val, lbl_cols, pretrain_model)

        y_train_ = X_train[train_nonna_indices, lbl_idx].numpy().astype(int)
        y_val_ = X_val[val_nonna_indices, lbl_idx].numpy().astype(int)

        X_train[:, lbl_cols] = float('nan')
        X_val[:, lbl_cols] = float('nan')
        
        y_train_pred = imputer.predict(X_train[train_nonna_indices], lbl_cols).cpu().detach().numpy()
        y_val_pred = imputer.predict(X_val[val_nonna_indices], lbl_cols).cpu().detach().numpy()
        y_test_pred = imputer.predict(X_tensor_test, lbl_cols).cpu().detach().numpy()

        # model = clone(model_init)

        # model.fit(X_train_, y_train_)

        oof_non_rounded += [y_val_pred]
        y_val_pred_rounded = y_val_pred.round(0).astype(int)
        oof_rounded += [y_val_pred_rounded]
        oof_gt += [y_val_]
        
        train_kappa = quadratic_weighted_kappa(y_train_, y_train_pred.round(0).astype(int))
        val_kappa = quadratic_weighted_kappa(y_val_, y_val_pred.round(0).astype(int))

        train_S.append(train_kappa)
        test_S.append(val_kappa)
        
        
        test_preds[:, fold] = y_test_pred

        pbar.set_description_str(
          "Fold %d, Train MSE: %.4f, Val MSE: %.4f, Train QWK: %.4f, Val QWK: %.4f" % (
              fold + 1,
              ((y_train_pred - y_train_) ** 2 / 9.).mean(),
              ((y_val_pred - y_val_) ** 2 / 9.).mean(),
              train_kappa,
              val_kappa
          )
        )
    
    
    print(f"Mean Train QWK --> {np.mean(train_S):.4f}")
    print(f"Mean Validation QWK ---> {np.mean(test_S):.4f}")

    oof_non_rounded = np.concatenate(oof_non_rounded)
    oof_gt = np.concatenate(oof_gt)
    KappaOPtimizer = minimize(evaluate_predictions,
                              x0=[0.5, 1.5, 2.5], args=(oof_gt, oof_non_rounded), 
                              method='Nelder-Mead')
    assert KappaOPtimizer.success, "Optimization did not converge."
    
    oof_tuned = threshold_Rounder(oof_non_rounded, KappaOPtimizer.x)
    tKappa = quadratic_weighted_kappa(oof_gt, oof_tuned)

    print(f"----> || Optimized QWK SCORE :: {Fore.CYAN}{Style.BRIGHT} {tKappa:.3f}{Style.RESET_ALL}")


    # return 0.5 * (np.mean(test_S) + tKappa)
  
  
    tpm = test_preds.mean(axis=1)
    tpTuned = threshold_Rounder(tpm, KappaOPtimizer.x)
    
    sample_sub_df = pd.read_csv('../input/child-mind-institute-problematic-internet-use/sample_submission.csv')
    submission = pd.DataFrame({
        'id': sample_sub_df['id'],
        'sii': tpTuned
    })

    return submission



In [15]:
imputer_args = Namespace(
    batch_size=64,
    max_epochs= 50,
    pretrain_epochs=400,
    accum_iter=1,
    mask_ratio=0.75,
    embed_dim=6,
    depth=8,
    decoder_depth=1,
    num_heads=3,
    mlp_ratio=21.5,
    cls_mlp_dim=48,
    dropout=0.5,
    encode_func='linear',
    norm_field_loss=False,
    ema_decay=0.9,
    weight_decay=0.05,
    lr=None, blr=0.001,
    min_lr=1e-05,
    device='cuda', seed=SEED, overwrite=True, pin_mem=True
)
print(imputer_args)



SEED = random.randint(1, int(2e9))
np.random.seed(SEED)
indices = np.random.permutation(len(X_raw))
X_raw = X_raw[indices]
sub1 = PerformImpute(imputer_args)
sub1

Namespace(batch_size=64, max_epochs=50, pretrain_epochs=400, accum_iter=1, mask_ratio=0.75, embed_dim=6, depth=8, decoder_depth=1, num_heads=3, mlp_ratio=21.5, cls_mlp_dim=48, dropout=0.5, encode_func='linear', norm_field_loss=False, ema_decay=0.9, weight_decay=0.05, lr=None, blr=0.001, min_lr=1e-05, device='cuda', seed=314159, overwrite=True, pin_mem=True)
Epoch: 1, loss: -0.3139
Epoch: 40, loss: -0.2113
Epoch: 80, loss: -0.1535
Epoch: 120, loss: -0.0766
Epoch: 160, loss: -0.0644
Epoch: 200, loss: -0.0611
Epoch: 240, loss: -0.0595
Epoch: 280, loss: -0.0582
Epoch: 320, loss: -0.0573
Epoch: 360, loss: -0.0564
Epoch: 400, loss: -0.0564
Loaded best model with loss=0.0564
Pretrained in 1812.8070s.


Training Folds:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1, train;val;best qwk: -0.0032;-0.0253;-0.0253, loss: 0.1604, val_loss: 0.0253
Epoch: 5, train;val;best qwk: 0.3683;0.2932;0.2932, loss: 0.0653, val_loss: -0.2932
Epoch: 10, train;val;best qwk: 0.4466;0.3903;0.3932, loss: 0.0587, val_loss: -0.3903
Epoch: 15, train;val;best qwk: 0.5303;0.3865;0.4014, loss: 0.0547, val_loss: -0.3865
Epoch: 20, train;val;best qwk: 0.5878;0.3622;0.4176, loss: 0.0505, val_loss: -0.3622
Epoch: 25, train;val;best qwk: 0.6753;0.3740;0.4176, loss: 0.0472, val_loss: -0.3740
Epoch: 30, train;val;best qwk: 0.7138;0.3826;0.4176, loss: 0.0527, val_loss: -0.3826
Epoch: 35, train;val;best qwk: 0.7268;0.3623;0.4176, loss: 0.0504, val_loss: -0.3623
Epoch: 40, train;val;best qwk: 0.7530;0.3616;0.4176, loss: 0.0489, val_loss: -0.3616
Epoch: 45, train;val;best qwk: 0.7527;0.3593;0.4176, loss: 0.0476, val_loss: -0.3593
Epoch: 50, train;val;best qwk: 0.7609;0.3735;0.4176, loss: 0.0475, val_loss: -0.3735
Loaded best model with loss=-0.4176


Fold 1, Train MSE: 0.0367, Val MSE: 0.0529, Train QWK: 0.5939, Val QWK: 0.4178:  20%|██        | 1/5 [12:00<48:02, 720.63s/it]

Epoch: 1, train;val;best qwk: -0.0087;-0.0256;-0.0256, loss: 0.1610, val_loss: 0.0256
Epoch: 5, train;val;best qwk: 0.3700;0.2847;0.2847, loss: 0.0658, val_loss: -0.2847
Epoch: 10, train;val;best qwk: 0.4536;0.3580;0.3808, loss: 0.0586, val_loss: -0.3580
Epoch: 15, train;val;best qwk: 0.5272;0.3597;0.3808, loss: 0.0553, val_loss: -0.3597
Epoch: 20, train;val;best qwk: 0.5545;0.3202;0.3912, loss: 0.0512, val_loss: -0.3202
Epoch: 25, train;val;best qwk: 0.6333;0.3657;0.3912, loss: 0.0479, val_loss: -0.3657
Epoch: 30, train;val;best qwk: 0.6754;0.3459;0.3912, loss: 0.0521, val_loss: -0.3459
Epoch: 35, train;val;best qwk: 0.7196;0.3378;0.3912, loss: 0.0502, val_loss: -0.3378
Epoch: 40, train;val;best qwk: 0.7362;0.3381;0.3912, loss: 0.0487, val_loss: -0.3381
Epoch: 45, train;val;best qwk: 0.7459;0.3436;0.3912, loss: 0.0476, val_loss: -0.3436
Epoch: 50, train;val;best qwk: 0.7450;0.3462;0.3912, loss: 0.0472, val_loss: -0.3462
Loaded best model with loss=-0.3912


Fold 2, Train MSE: 0.0377, Val MSE: 0.0536, Train QWK: 0.5940, Val QWK: 0.3917:  40%|████      | 2/5 [24:00<36:01, 720.34s/it]

Epoch: 1, train;val;best qwk: -0.0102;0.0121;0.0121, loss: 0.1625, val_loss: -0.0121
Epoch: 5, train;val;best qwk: 0.3176;0.3025;0.3172, loss: 0.0664, val_loss: -0.3025
Epoch: 10, train;val;best qwk: 0.4436;0.3860;0.4168, loss: 0.0597, val_loss: -0.3860
Epoch: 15, train;val;best qwk: 0.5098;0.3902;0.4168, loss: 0.0551, val_loss: -0.3902
Epoch: 20, train;val;best qwk: 0.5802;0.3651;0.4168, loss: 0.0512, val_loss: -0.3651
Epoch: 25, train;val;best qwk: 0.6708;0.3130;0.4168, loss: 0.0479, val_loss: -0.3130
Epoch: 30, train;val;best qwk: 0.7206;0.3279;0.4168, loss: 0.0529, val_loss: -0.3279
Epoch: 35, train;val;best qwk: 0.7213;0.2910;0.4168, loss: 0.0507, val_loss: -0.2910
Epoch: 40, train;val;best qwk: 0.7544;0.3054;0.4168, loss: 0.0491, val_loss: -0.3054
Epoch: 45, train;val;best qwk: 0.7518;0.3053;0.4168, loss: 0.0476, val_loss: -0.3053
Epoch: 50, train;val;best qwk: 0.7593;0.3038;0.4168, loss: 0.0476, val_loss: -0.3038
Loaded best model with loss=-0.4168


Fold 3, Train MSE: 0.0487, Val MSE: 0.0522, Train QWK: 0.4061, Val QWK: 0.4158:  60%|██████    | 3/5 [36:01<24:00, 720.43s/it]

Epoch: 1, train;val;best qwk: -0.0042;-0.0005;-0.0005, loss: 0.1630, val_loss: 0.0005
Epoch: 5, train;val;best qwk: 0.3447;0.3485;0.3485, loss: 0.0657, val_loss: -0.3485
Epoch: 10, train;val;best qwk: 0.4506;0.4314;0.4314, loss: 0.0595, val_loss: -0.4314
Epoch: 15, train;val;best qwk: 0.5558;0.4536;0.4536, loss: 0.0554, val_loss: -0.4536
Epoch: 20, train;val;best qwk: 0.5798;0.3858;0.4536, loss: 0.0521, val_loss: -0.3858
Epoch: 25, train;val;best qwk: 0.6272;0.3811;0.4536, loss: 0.0486, val_loss: -0.3811
Epoch: 30, train;val;best qwk: 0.6858;0.3802;0.4536, loss: 0.0529, val_loss: -0.3802
Epoch: 35, train;val;best qwk: 0.7057;0.3764;0.4536, loss: 0.0509, val_loss: -0.3764
Epoch: 40, train;val;best qwk: 0.7282;0.3769;0.4536, loss: 0.0495, val_loss: -0.3769
Epoch: 45, train;val;best qwk: 0.7270;0.3685;0.4536, loss: 0.0488, val_loss: -0.3685
Epoch: 50, train;val;best qwk: 0.7285;0.3722;0.4536, loss: 0.0482, val_loss: -0.3722
Loaded best model with loss=-0.4536


Fold 4, Train MSE: 0.0419, Val MSE: 0.0522, Train QWK: 0.5556, Val QWK: 0.4536:  80%|████████  | 4/5 [48:00<11:59, 719.77s/it]

Epoch: 1, train;val;best qwk: -0.0114;-0.0037;-0.0037, loss: 0.1604, val_loss: 0.0037
Epoch: 5, train;val;best qwk: 0.3127;0.3558;0.3558, loss: 0.0657, val_loss: -0.3558
Epoch: 10, train;val;best qwk: 0.4442;0.3569;0.3842, loss: 0.0589, val_loss: -0.3569
Epoch: 15, train;val;best qwk: 0.5453;0.3871;0.4184, loss: 0.0543, val_loss: -0.3871
Epoch: 20, train;val;best qwk: 0.6195;0.3714;0.4184, loss: 0.0506, val_loss: -0.3714
Epoch: 25, train;val;best qwk: 0.6670;0.3813;0.4184, loss: 0.0478, val_loss: -0.3813
Epoch: 30, train;val;best qwk: 0.7028;0.3859;0.4184, loss: 0.0536, val_loss: -0.3859
Epoch: 35, train;val;best qwk: 0.7199;0.3821;0.4184, loss: 0.0510, val_loss: -0.3821
Epoch: 40, train;val;best qwk: 0.7384;0.3759;0.4184, loss: 0.0493, val_loss: -0.3759
Epoch: 45, train;val;best qwk: 0.7446;0.3697;0.4184, loss: 0.0477, val_loss: -0.3697
Epoch: 50, train;val;best qwk: 0.7491;0.3726;0.4184, loss: 0.0475, val_loss: -0.3726
Loaded best model with loss=-0.4184


Fold 5, Train MSE: 0.0434, Val MSE: 0.0527, Train QWK: 0.5084, Val QWK: 0.4179: 100%|██████████| 5/5 [59:58<00:00, 719.60s/it]

Mean Train QWK --> 0.5316
Mean Validation QWK ---> 0.4193
----> || Optimized QWK SCORE :: [36m[1m 0.451[0m





Unnamed: 0,id,sii
0,00008ff9,0
1,000fd460,0
2,00105258,1
3,00115b9f,0
4,0016bb22,1
5,001f3379,0
6,0038ba98,0
7,0068a485,0
8,0069fbed,1
9,0083e397,1


In [16]:
def process_file(filename, dirname):
    df = pd.read_parquet(os.path.join(dirname, filename, 'part-0.parquet'))
    df.drop('step', axis=1, inplace=True)
    return df.describe().values.reshape(-1), filename.split('=')[1]

def load_time_series(dirname) -> pd.DataFrame:
    ids = os.listdir(dirname)
    
    with ThreadPoolExecutor() as executor:
        results = list(tqdm(executor.map(lambda fname: process_file(fname, dirname), ids), total=len(ids)))
    
    stats, indexes = zip(*results)
    
    df = pd.DataFrame(stats, columns=[f"stat_{i}" for i in range(len(stats[0]))])
    df['id'] = indexes
    return df

train_ts = load_time_series("/kaggle/input/child-mind-institute-problematic-internet-use/series_train.parquet")
test_ts = load_time_series("/kaggle/input/child-mind-institute-problematic-internet-use/series_test.parquet")

100%|██████████| 996/996 [01:14<00:00, 13.39it/s]
100%|██████████| 2/2 [00:00<00:00, 12.12it/s]


In [17]:
train = pd.read_csv('/kaggle/input/child-mind-institute-problematic-internet-use/train.csv')
test = pd.read_csv('/kaggle/input/child-mind-institute-problematic-internet-use/test.csv')
sample = pd.read_csv('/kaggle/input/child-mind-institute-problematic-internet-use/sample_submission.csv')

time_series_cols = train_ts.columns.tolist()
time_series_cols.remove("id")

train = pd.merge(train, train_ts, how="left", on='id')
test = pd.merge(test, test_ts, how="left", on='id')

train = train.drop('id', axis=1)
test = test.drop('id', axis=1)   

featuresCols = ['Basic_Demos-Enroll_Season', 'Basic_Demos-Age', 'Basic_Demos-Sex',
                'CGAS-Season', 'CGAS-CGAS_Score', 'Physical-Season', 'Physical-BMI',
                'Physical-Height', 'Physical-Weight', 'Physical-Waist_Circumference',
                'Physical-Diastolic_BP', 'Physical-HeartRate', 'Physical-Systolic_BP',
                'Fitness_Endurance-Season', 'Fitness_Endurance-Max_Stage',
                'Fitness_Endurance-Time_Mins', 'Fitness_Endurance-Time_Sec',
                'FGC-Season', 'FGC-FGC_CU', 'FGC-FGC_CU_Zone', 'FGC-FGC_GSND',
                'FGC-FGC_GSND_Zone', 'FGC-FGC_GSD', 'FGC-FGC_GSD_Zone', 'FGC-FGC_PU',
                'FGC-FGC_PU_Zone', 'FGC-FGC_SRL', 'FGC-FGC_SRL_Zone', 'FGC-FGC_SRR',
                'FGC-FGC_SRR_Zone', 'FGC-FGC_TL', 'FGC-FGC_TL_Zone', 'BIA-Season',
                'BIA-BIA_Activity_Level_num', 'BIA-BIA_BMC', 'BIA-BIA_BMI',
                'BIA-BIA_BMR', 'BIA-BIA_DEE', 'BIA-BIA_ECW', 'BIA-BIA_FFM',
                'BIA-BIA_FFMI', 'BIA-BIA_FMI', 'BIA-BIA_Fat', 'BIA-BIA_Frame_num',
                'BIA-BIA_ICW', 'BIA-BIA_LDM', 'BIA-BIA_LST', 'BIA-BIA_SMM',
                'BIA-BIA_TBW', 'PAQ_A-Season', 'PAQ_A-PAQ_A_Total', 'PAQ_C-Season',
                'PAQ_C-PAQ_C_Total', 'SDS-Season', 'SDS-SDS_Total_Raw',
                'SDS-SDS_Total_T', 'PreInt_EduHx-Season',
                'PreInt_EduHx-computerinternet_hoursday', 'sii']

featuresCols += time_series_cols

train = train[featuresCols]
train = train.dropna(subset='sii')

cat_c = ['Basic_Demos-Enroll_Season', 'CGAS-Season', 'Physical-Season', 
          'Fitness_Endurance-Season', 'FGC-Season', 'BIA-Season', 
          'PAQ_A-Season', 'PAQ_C-Season', 'SDS-Season', 'PreInt_EduHx-Season']

def update(df):
    global cat_c
    for c in cat_c: 
        df[c] = df[c].fillna('Missing')
        df[c] = df[c].astype('category')
    return df
        
train = update(train)
test = update(test)

def create_mapping(column, dataset):
    unique_values = dataset[column].unique()
    return {value: idx for idx, value in enumerate(unique_values)}

for col in cat_c:
    mapping = create_mapping(col, train)
    mappingTe = create_mapping(col, test)
    
    train[col] = train[col].replace(mapping).astype(int)
    test[col] = test[col].replace(mappingTe).astype(int)

def quadratic_weighted_kappa(y_true, y_pred):
    return cohen_kappa_score(y_true, y_pred, weights='quadratic')

def threshold_Rounder(oof_non_rounded, thresholds):
    return np.where(oof_non_rounded < thresholds[0], 0,
                    np.where(oof_non_rounded < thresholds[1], 1,
                             np.where(oof_non_rounded < thresholds[2], 2, 3)))

def evaluate_predictions(thresholds, y_true, oof_non_rounded):
    rounded_p = threshold_Rounder(oof_non_rounded, thresholds)
    return -quadratic_weighted_kappa(y_true, rounded_p)

def TrainML(model_class, test_data):
    X = train.drop(['sii'], axis=1)
    y = train['sii']

    SKF = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=SEED)
    
    train_S = []
    test_S = []
    
    oof_non_rounded = np.zeros(len(y), dtype=float) 
    oof_rounded = np.zeros(len(y), dtype=int) 
    test_preds = np.zeros((len(test_data), n_splits))

    for fold, (train_idx, test_idx) in enumerate(tqdm(SKF.split(X, y), desc="Training Folds", total=n_splits)):
        X_train, X_val = X.iloc[train_idx], X.iloc[test_idx]
        y_train, y_val = y.iloc[train_idx], y.iloc[test_idx]

        model = clone(model_class)
        model.fit(X_train, y_train)

        y_train_pred = model.predict(X_train)
        y_val_pred = model.predict(X_val)

        oof_non_rounded[test_idx] = y_val_pred
        y_val_pred_rounded = y_val_pred.round(0).astype(int)
        oof_rounded[test_idx] = y_val_pred_rounded

        train_kappa = quadratic_weighted_kappa(y_train, y_train_pred.round(0).astype(int))
        val_kappa = quadratic_weighted_kappa(y_val, y_val_pred_rounded)

        train_S.append(train_kappa)
        test_S.append(val_kappa)
        
        test_preds[:, fold] = model.predict(test_data)
        
        print(f"Fold {fold+1} - Train QWK: {train_kappa:.4f}, Validation QWK: {val_kappa:.4f}")
        clear_output(wait=True)

    print(f"Mean Train QWK --> {np.mean(train_S):.4f}")
    print(f"Mean Validation QWK ---> {np.mean(test_S):.4f}")

    KappaOPtimizer = minimize(evaluate_predictions,
                              x0=[0.5, 1.5, 2.5], args=(y, oof_non_rounded), 
                              method='Nelder-Mead')
    assert KappaOPtimizer.success, "Optimization did not converge."
    
    oof_tuned = threshold_Rounder(oof_non_rounded, KappaOPtimizer.x)
    tKappa = quadratic_weighted_kappa(y, oof_tuned)

    print(f"----> || Optimized QWK SCORE :: {Fore.CYAN}{Style.BRIGHT} {tKappa:.3f}{Style.RESET_ALL}")

    tpm = test_preds.mean(axis=1)
    tpTuned = threshold_Rounder(tpm, KappaOPtimizer.x)
    
    submission = pd.DataFrame({
        'id': sample['id'],
        'sii': tpTuned
    })

    return submission

# Model parameters for LightGBM
Params = {
    'learning_rate': 0.046,
    'max_depth': 12,
    'num_leaves': 478,
    'min_data_in_leaf': 13,
    'feature_fraction': 0.893,
    'bagging_fraction': 0.784,
    'bagging_freq': 4,
    'lambda_l1': 10,  # Increased from 6.59
    'lambda_l2': 0.01  # Increased from 2.68e-06
}


# XGBoost parameters
XGB_Params = {
    'learning_rate': 0.05,
    'max_depth': 6,
    'n_estimators': 200,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'reg_alpha': 1,  # Increased from 0.1
    'reg_lambda': 5,  # Increased from 1
    'random_state': SEED
}


CatBoost_Params = {
    'learning_rate': 0.05,
    'depth': 6,
    'iterations': 200,
    'random_seed': SEED,
    'cat_features': cat_c,
    'verbose': 0,
    'l2_leaf_reg': 10  # Increase this value
}

# Create model instances
Light = LGBMRegressor(**Params, random_state=SEED, verbose=-1, n_estimators=300)
XGB_Model = XGBRegressor(**XGB_Params)
CatBoost_Model = CatBoostRegressor(**CatBoost_Params)

# Combine models using Voting Regressor
voting_model = VotingRegressor(estimators=[
    ('lightgbm', Light),
    ('xgboost', XGB_Model),
    ('catboost', CatBoost_Model)
])

# Train the ensemble model
sub2 = TrainML(voting_model, test)
sub2

Training Folds: 100%|██████████| 5/5 [00:54<00:00, 10.88s/it]

Mean Train QWK --> 0.7607
Mean Validation QWK ---> 0.3855





----> || Optimized QWK SCORE :: [36m[1m 0.449[0m


Unnamed: 0,id,sii
0,00008ff9,1
1,000fd460,0
2,00105258,0
3,00115b9f,0
4,0016bb22,1
5,001f3379,1
6,0038ba98,0
7,0068a485,0
8,0069fbed,1
9,0083e397,0


In [18]:
train = pd.read_csv('/kaggle/input/child-mind-institute-problematic-internet-use/train.csv')
test = pd.read_csv('/kaggle/input/child-mind-institute-problematic-internet-use/test.csv')
sample = pd.read_csv('/kaggle/input/child-mind-institute-problematic-internet-use/sample_submission.csv')

featuresCols = ['Basic_Demos-Enroll_Season', 'Basic_Demos-Age', 'Basic_Demos-Sex',
                'CGAS-Season', 'CGAS-CGAS_Score', 'Physical-Season', 'Physical-BMI',
                'Physical-Height', 'Physical-Weight', 'Physical-Waist_Circumference',
                'Physical-Diastolic_BP', 'Physical-HeartRate', 'Physical-Systolic_BP',
                'Fitness_Endurance-Season', 'Fitness_Endurance-Max_Stage',
                'Fitness_Endurance-Time_Mins', 'Fitness_Endurance-Time_Sec',
                'FGC-Season', 'FGC-FGC_CU', 'FGC-FGC_CU_Zone', 'FGC-FGC_GSND',
                'FGC-FGC_GSND_Zone', 'FGC-FGC_GSD', 'FGC-FGC_GSD_Zone', 'FGC-FGC_PU',
                'FGC-FGC_PU_Zone', 'FGC-FGC_SRL', 'FGC-FGC_SRL_Zone', 'FGC-FGC_SRR',
                'FGC-FGC_SRR_Zone', 'FGC-FGC_TL', 'FGC-FGC_TL_Zone', 'BIA-Season',
                'BIA-BIA_Activity_Level_num', 'BIA-BIA_BMC', 'BIA-BIA_BMI',
                'BIA-BIA_BMR', 'BIA-BIA_DEE', 'BIA-BIA_ECW', 'BIA-BIA_FFM',
                'BIA-BIA_FFMI', 'BIA-BIA_FMI', 'BIA-BIA_Fat', 'BIA-BIA_Frame_num',
                'BIA-BIA_ICW', 'BIA-BIA_LDM', 'BIA-BIA_LST', 'BIA-BIA_SMM',
                'BIA-BIA_TBW', 'PAQ_A-Season', 'PAQ_A-PAQ_A_Total', 'PAQ_C-Season',
                'PAQ_C-PAQ_C_Total', 'SDS-Season', 'SDS-SDS_Total_Raw',
                'SDS-SDS_Total_T', 'PreInt_EduHx-Season',
                'PreInt_EduHx-computerinternet_hoursday', 'sii']

cat_c = ['Basic_Demos-Enroll_Season', 'CGAS-Season', 'Physical-Season', 
          'Fitness_Endurance-Season', 'FGC-Season', 'BIA-Season', 
          'PAQ_A-Season', 'PAQ_C-Season', 'SDS-Season', 'PreInt_EduHx-Season']

train_ts = load_time_series("/kaggle/input/child-mind-institute-problematic-internet-use/series_train.parquet")
test_ts = load_time_series("/kaggle/input/child-mind-institute-problematic-internet-use/series_test.parquet")

time_series_cols = train_ts.columns.tolist()
time_series_cols.remove("id")

train = pd.merge(train, train_ts, how="left", on='id')
test = pd.merge(test, test_ts, how="left", on='id')

train = train.drop('id', axis=1)
test = test.drop('id', axis=1)

featuresCols += time_series_cols

train = train[featuresCols]
train = train.dropna(subset='sii')

def update(df):
    global cat_c
    for c in cat_c: 
        df[c] = df[c].fillna('Missing')
        df[c] = df[c].astype('category')
    return df

train = update(train)
test = update(test)

def create_mapping(column, dataset):
    unique_values = dataset[column].unique()
    return {value: idx for idx, value in enumerate(unique_values)}

for col in cat_c:
    mapping = create_mapping(col, train)
    mappingTe = create_mapping(col, test)
    
    train[col] = train[col].replace(mapping).astype(int)
    test[col] = test[col].replace(mappingTe).astype(int)

def quadratic_weighted_kappa(y_true, y_pred):
    return cohen_kappa_score(y_true, y_pred, weights='quadratic')

def threshold_Rounder(oof_non_rounded, thresholds):
    return np.where(oof_non_rounded < thresholds[0], 0,
                    np.where(oof_non_rounded < thresholds[1], 1,
                             np.where(oof_non_rounded < thresholds[2], 2, 3)))

def evaluate_predictions(thresholds, y_true, oof_non_rounded):
    rounded_p = threshold_Rounder(oof_non_rounded, thresholds)
    return -quadratic_weighted_kappa(y_true, rounded_p)

def TrainML(model_class, test_data):
    X = train.drop(['sii'], axis=1)
    y = train['sii']

    SKF = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=SEED)
    
    train_S = []
    test_S = []
    
    oof_non_rounded = np.zeros(len(y), dtype=float) 
    oof_rounded = np.zeros(len(y), dtype=int) 
    test_preds = np.zeros((len(test_data), n_splits))

    for fold, (train_idx, test_idx) in enumerate(tqdm(SKF.split(X, y), desc="Training Folds", total=n_splits)):
        X_train, X_val = X.iloc[train_idx], X.iloc[test_idx]
        y_train, y_val = y.iloc[train_idx], y.iloc[test_idx]

        model = clone(model_class)
        model.fit(X_train, y_train)

        y_train_pred = model.predict(X_train)
        y_val_pred = model.predict(X_val)

        oof_non_rounded[test_idx] = y_val_pred
        y_val_pred_rounded = y_val_pred.round(0).astype(int)
        oof_rounded[test_idx] = y_val_pred_rounded

        train_kappa = quadratic_weighted_kappa(y_train, y_train_pred.round(0).astype(int))
        val_kappa = quadratic_weighted_kappa(y_val, y_val_pred_rounded)

        train_S.append(train_kappa)
        test_S.append(val_kappa)
        
        test_preds[:, fold] = model.predict(test_data)
        
        print(f"Fold {fold+1} - Train QWK: {train_kappa:.4f}, Validation QWK: {val_kappa:.4f}")
        clear_output(wait=True)

    print(f"Mean Train QWK --> {np.mean(train_S):.4f}")
    print(f"Mean Validation QWK ---> {np.mean(test_S):.4f}")

    KappaOPtimizer = minimize(evaluate_predictions,
                              x0=[0.5, 1.5, 2.5], args=(y, oof_non_rounded), 
                              method='Nelder-Mead')
    assert KappaOPtimizer.success, "Optimization did not converge."
    
    oof_tuned = threshold_Rounder(oof_non_rounded, KappaOPtimizer.x)
    tKappa = quadratic_weighted_kappa(y, oof_tuned)

    print(f"----> || Optimized QWK SCORE :: {Fore.CYAN}{Style.BRIGHT} {tKappa:.3f}{Style.RESET_ALL}")

    tpm = test_preds.mean(axis=1)
    tp_rounded = threshold_Rounder(tpm, KappaOPtimizer.x)

    return tp_rounded

imputer = SimpleImputer(strategy='median')

ensemble = VotingRegressor(estimators=[
    ('lgb', Pipeline(steps=[('imputer', imputer), ('regressor', LGBMRegressor(random_state=SEED))])),
    ('xgb', Pipeline(steps=[('imputer', imputer), ('regressor', XGBRegressor(random_state=SEED))])),
    ('cat', Pipeline(steps=[('imputer', imputer), ('regressor', CatBoostRegressor(random_state=SEED, silent=True))])),
    ('rf', Pipeline(steps=[('imputer', imputer), ('regressor', RandomForestRegressor(random_state=SEED))])),
    ('gb', Pipeline(steps=[('imputer', imputer), ('regressor', GradientBoostingRegressor(random_state=SEED))]))
])

sub3 = TrainML(ensemble, test)
sub3 = pd.DataFrame({
    'id': sample['id'],
    'sii': sub3
})

sub3

Training Folds: 100%|██████████| 5/5 [02:13<00:00, 26.79s/it]

Mean Train QWK --> 0.9179
Mean Validation QWK ---> 0.3660





----> || Optimized QWK SCORE :: [36m[1m 0.450[0m


Unnamed: 0,id,sii
0,00008ff9,2
1,000fd460,0
2,00105258,0
3,00115b9f,0
4,0016bb22,0
5,001f3379,1
6,0038ba98,0
7,0068a485,0
8,0069fbed,2
9,0083e397,0


In [19]:
sub1 = sub1.sort_values(by='id').reset_index(drop=True)
sub2 = sub2.sort_values(by='id').reset_index(drop=True)
sub3 = sub3.sort_values(by='id').reset_index(drop=True)

combined = pd.DataFrame({
    'id': sub1['id'],
    'sii_1': sub1['sii'],
    'sii_2': sub2['sii'],
    'sii_3': sub3['sii'],
})

def majority_vote(row):
    return row.mode()[0]

combined['final_sii'] = combined[['sii_1', 'sii_2', 'sii_3']].apply(majority_vote, axis=1)

final_submission = combined[['id', 'final_sii']].rename(columns={'final_sii': 'sii'})

final_submission.to_csv('submission.csv', index=False)

print("Majority voting completed and saved to 'Final_Submission.csv'")


Majority voting completed and saved to 'Final_Submission.csv'


In [20]:
final_submission


Unnamed: 0,id,sii
0,00008ff9,0
1,000fd460,0
2,00105258,0
3,00115b9f,0
4,0016bb22,1
5,001f3379,1
6,0038ba98,0
7,0068a485,0
8,0069fbed,1
9,0083e397,0
