In [1]:
!pip -q install /kaggle/input/pytorchtabnet/pytorch_tabnet-4.1.0-py3-none-any.whl

In [2]:
from pytorch_tabnet.tab_model import TabNetRegressor, TabNetClassifier
import torch

In [3]:
import numpy as np
import pandas as pd
import os
import re
from sklearn.base import clone
from sklearn.metrics import cohen_kappa_score
from scipy.optimize import minimize
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm
import polars as pl
import polars.selectors as cs
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator, PercentFormatter
import seaborn as sns

from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from keras.models import Model
from keras.layers import Input, Dense
from keras.optimizers import Adam
import torch
import torch.nn as nn
import torch.optim as optim

from colorama import Fore, Style
from IPython.display import clear_output
import warnings
from lightgbm import LGBMRegressor
from xgboost import XGBRegressor
from catboost import CatBoostRegressor
from sklearn.impute import KNNImputer
warnings.filterwarnings('ignore')
pd.options.display.max_columns = None

In [4]:
import random
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True
seed_everything(2024)

In [5]:
target_labels = ['None', 'Mild', 'Moderate', 'Severe']

In [6]:
season_dtype = pl.Enum(['Spring', 'Summer', 'Fall', 'Winter'])

train = (
    pl.read_csv('/kaggle/input/child-mind-institute-problematic-internet-use/train.csv')
    .with_columns(pl.col('^.*Season$').cast(season_dtype))
)

test = (
    pl.read_csv('/kaggle/input/child-mind-institute-problematic-internet-use/test.csv')
    .with_columns(pl.col('^.*Season$').cast(season_dtype))
)

train
test

id,Basic_Demos-Enroll_Season,Basic_Demos-Age,Basic_Demos-Sex,CGAS-Season,CGAS-CGAS_Score,Physical-Season,Physical-BMI,Physical-Height,Physical-Weight,Physical-Waist_Circumference,Physical-Diastolic_BP,Physical-HeartRate,Physical-Systolic_BP,Fitness_Endurance-Season,Fitness_Endurance-Max_Stage,Fitness_Endurance-Time_Mins,Fitness_Endurance-Time_Sec,FGC-Season,FGC-FGC_CU,FGC-FGC_CU_Zone,FGC-FGC_GSND,FGC-FGC_GSND_Zone,FGC-FGC_GSD,FGC-FGC_GSD_Zone,FGC-FGC_PU,FGC-FGC_PU_Zone,FGC-FGC_SRL,FGC-FGC_SRL_Zone,FGC-FGC_SRR,FGC-FGC_SRR_Zone,FGC-FGC_TL,FGC-FGC_TL_Zone,BIA-Season,BIA-BIA_Activity_Level_num,BIA-BIA_BMC,BIA-BIA_BMI,BIA-BIA_BMR,BIA-BIA_DEE,BIA-BIA_ECW,BIA-BIA_FFM,BIA-BIA_FFMI,BIA-BIA_FMI,BIA-BIA_Fat,BIA-BIA_Frame_num,BIA-BIA_ICW,BIA-BIA_LDM,BIA-BIA_LST,BIA-BIA_SMM,BIA-BIA_TBW,PAQ_A-Season,PAQ_A-PAQ_A_Total,PAQ_C-Season,PAQ_C-PAQ_C_Total,SDS-Season,SDS-SDS_Total_Raw,SDS-SDS_Total_T,PreInt_EduHx-Season,PreInt_EduHx-computerinternet_hoursday
str,enum,i64,i64,enum,i64,enum,f64,f64,f64,f64,i64,i64,i64,enum,i64,i64,i64,enum,i64,i64,f64,i64,f64,i64,i64,i64,f64,i64,f64,i64,f64,i64,enum,i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,i64,f64,f64,f64,f64,f64,enum,f64,enum,f64,enum,i64,i64,enum,i64
"""00008ff9""","""Fall""",5,0,"""Winter""",51,"""Fall""",16.877316,46.0,50.8,,,,,,,,,"""Fall""",0,0,,,,,0,0,7.0,0,6.0,0,6.0,1,"""Fall""",2,2.66855,16.8792,932.498,1492.0,8.25598,41.5862,13.8177,3.06143,9.21377,1,24.4349,8.89536,38.9177,19.5413,32.6909,,,,,,,,"""Fall""",3
"""000fd460""","""Summer""",9,0,,,"""Fall""",14.03559,48.0,46.0,22.0,75,70,122,,,,,"""Fall""",3,0,,,,,5,0,11.0,1,11.0,1,3.0,0,"""Winter""",2,2.57949,14.0371,936.656,1498.65,6.01993,42.0291,12.8254,1.21172,3.97085,1,21.0352,14.974,39.4497,15.4107,27.0552,,,"""Fall""",2.34,"""Fall""",46,64,"""Summer""",0
"""00105258""","""Summer""",10,1,"""Fall""",71,"""Fall""",16.648696,56.5,75.6,,65,94,117,"""Fall""",5,7,33,"""Fall""",20,1,10.2,1,14.7,2,7,1,10.0,1,10.0,1,5.0,0,,,,,,,,,,,,,,,,,,,,"""Summer""",2.17,"""Fall""",38,54,"""Summer""",2
"""00115b9f""","""Winter""",9,0,"""Fall""",71,"""Summer""",18.292347,56.0,81.6,,60,97,117,"""Summer""",6,9,37,"""Summer""",18,1,,,,,5,0,7.0,0,7.0,0,7.0,1,"""Summer""",3,3.84191,18.2943,1131.43,1923.44,15.5925,62.7757,14.074,4.22033,18.8243,2,30.4041,16.779,58.9338,26.4798,45.9966,,,"""Winter""",2.451,"""Summer""",31,45,"""Winter""",0
"""0016bb22""","""Spring""",18,1,"""Summer""",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,"""Summer""",1.04,,,,,,,
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""00c0cd71""","""Winter""",7,0,"""Summer""",51,"""Spring""",29.315775,54.0,121.6,,80,75,99,"""Spring""",4,5,32,"""Spring""",6,1,,,,,0,0,12.0,1,15.0,1,12.0,1,,,,,,,,,,,,,,,,,,,,,,"""Spring""",35,50,"""Winter""",2
"""00d56d4b""","""Spring""",5,1,"""Summer""",80,"""Spring""",17.284504,44.0,47.6,,61,76,109,"""Spring""",,,,"""Spring""",0,0,,,,,0,0,10.5,1,10.0,1,7.0,1,,,,,,,,,,,,,,,,,,,,,,"""Spring""",37,53,"""Spring""",0
"""00d9913d""","""Fall""",10,1,,,"""Fall""",19.893157,55.0,85.6,30.0,,81,,,,,,"""Fall""",5,0,,,,,0,0,0.0,0,0.0,0,9.0,1,,,,,,,,,,,,,,,,,,,,,,,,,"""Fall""",1
"""00e6167c""","""Winter""",6,0,"""Spring""",60,"""Winter""",30.094649,37.5,60.2,24.0,61,91,95,,,,,"""Winter""",6,1,,,,,0,0,4.0,0,4.0,0,7.0,1,"""Winter""",2,2.75035,17.2738,1003.07,1504.61,15.1456,49.1034,14.0898,3.18407,11.0966,1,23.6182,10.3396,46.3531,19.8886,38.7638,,,,,"""Winter""",39,55,"""Winter""",3


In [18]:
def process_file(filename, dirname):
    df = pd.read_parquet(os.path.join(dirname, filename, 'part-0.parquet'))
    df.drop('step', axis=1, inplace=True)
    return df.describe().values.reshape(-1), filename.split('=')[1]

def load_time_series(dirname) -> pd.DataFrame:
    ids = os.listdir(dirname)
    
    with ThreadPoolExecutor() as executor:
        results = list(tqdm(executor.map(lambda fname: process_file(fname, dirname), ids), total=len(ids)))
    
    stats, indexes = zip(*results)
    
    df = pd.DataFrame(stats, columns=[f"stat_{i}" for i in range(len(stats[0]))])
    df['id'] = indexes
    return df


class AutoEncoder(nn.Module):
    def __init__(self, input_dim, encoding_dim):
        super(AutoEncoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, encoding_dim*3),
            nn.ReLU(),
            nn.Linear(encoding_dim*3, encoding_dim*2),
            nn.ReLU(),
            nn.Linear(encoding_dim*2, encoding_dim),
            nn.ReLU()
        )
        self.decoder = nn.Sequential(
            nn.Linear(encoding_dim, input_dim*2),
            nn.ReLU(),
            nn.Linear(input_dim*2, input_dim*3),
            nn.ReLU(),
            nn.Linear(input_dim*3, input_dim),
            nn.Sigmoid()
        )
        
    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded


def perform_autoencoder(df, encoding_dim=50, epochs=50, batch_size=32):
    scaler = StandardScaler()
    df_scaled = scaler.fit_transform(df)
    
    data_tensor = torch.FloatTensor(df_scaled)
    
    input_dim = data_tensor.shape[1]
    autoencoder = AutoEncoder(input_dim, encoding_dim)
    
    criterion = nn.MSELoss()
    optimizer = optim.Adam(autoencoder.parameters())
    
    for epoch in range(epochs):
        for i in range(0, len(data_tensor), batch_size):
            batch = data_tensor[i : i + batch_size]
            optimizer.zero_grad()
            reconstructed = autoencoder(batch)
            loss = criterion(reconstructed, batch)
            loss.backward()
            optimizer.step()
            
        if (epoch + 1) % 10 == 0:
            print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}]')
                 
    with torch.no_grad():
        encoded_data = autoencoder.encoder(data_tensor).numpy()
        
    df_encoded = pd.DataFrame(encoded_data, columns=[f'Enc_{i + 1}' for i in range(encoded_data.shape[1])])
    
    return df_encoded

def feature_engineering(df):
    season_cols = [col for col in df.columns if 'Season' in col]
    df = df.drop(season_cols, axis=1) 
    df['BMI_Age'] = df['Physical-BMI'] * df['Basic_Demos-Age']
    df['Internet_Hours_Age'] = df['PreInt_EduHx-computerinternet_hoursday'] * df['Basic_Demos-Age']
    df['BMI_Internet_Hours'] = df['Physical-BMI'] * df['PreInt_EduHx-computerinternet_hoursday']
    df['BFP_BMI'] = df['BIA-BIA_Fat'] / df['BIA-BIA_BMI']
    df['FFMI_BFP'] = df['BIA-BIA_FFMI'] / df['BIA-BIA_Fat']
    df['FMI_BFP'] = df['BIA-BIA_FMI'] / df['BIA-BIA_Fat']
    df['LST_TBW'] = df['BIA-BIA_LST'] / df['BIA-BIA_TBW']
    df['BFP_BMR'] = df['BIA-BIA_Fat'] * df['BIA-BIA_BMR']
    df['BFP_DEE'] = df['BIA-BIA_Fat'] * df['BIA-BIA_DEE']
    df['BMR_Weight'] = df['BIA-BIA_BMR'] / df['Physical-Weight']
    df['DEE_Weight'] = df['BIA-BIA_DEE'] / df['Physical-Weight']
    df['SMM_Height'] = df['BIA-BIA_SMM'] / df['Physical-Height']
    df['Muscle_to_Fat'] = df['BIA-BIA_SMM'] / df['BIA-BIA_FMI']
    df['Hydration_Status'] = df['BIA-BIA_TBW'] / df['Physical-Weight']
    df['ICW_TBW'] = df['BIA-BIA_ICW'] / df['BIA-BIA_TBW']
    df['BMI_PHR'] = df['Physical-BMI'] * df['Physical-HeartRate']
    
    return df

In [19]:
train_ts = load_time_series("/kaggle/input/child-mind-institute-problematic-internet-use/series_train.parquet")
test_ts = load_time_series("/kaggle/input/child-mind-institute-problematic-internet-use/series_test.parquet")

100%|██████████| 996/996 [01:13<00:00, 13.56it/s]
100%|██████████| 2/2 [00:00<00:00,  9.16it/s]


In [20]:
train = pd.read_csv('/kaggle/input/child-mind-institute-problematic-internet-use/train.csv')
test = pd.read_csv('/kaggle/input/child-mind-institute-problematic-internet-use/test.csv')
sample = pd.read_csv('/kaggle/input/child-mind-institute-problematic-internet-use/sample_submission.csv')

df_train = train_ts.drop('id', axis=1)
df_test = test_ts.drop('id', axis=1)

train_ts_encoded = perform_autoencoder(df_train, encoding_dim=60, epochs=100, batch_size=32)
test_ts_encoded = perform_autoencoder(df_test, encoding_dim=60, epochs=100, batch_size=32)

time_series_cols = train_ts_encoded.columns.tolist()
train_ts_encoded["id"]=train_ts["id"]
test_ts_encoded['id']=test_ts["id"]

train = pd.merge(train, train_ts_encoded, how="left", on='id')
test = pd.merge(test, test_ts_encoded, how="left", on='id')

imputer = KNNImputer(n_neighbors=5)
numeric_cols = train.select_dtypes(include=['float64', 'int64']).columns
imputed_data = imputer.fit_transform(train[numeric_cols])
train_imputed = pd.DataFrame(imputed_data, columns=numeric_cols)
train_imputed['sii'] = train_imputed['sii'].round().astype(int)
for col in train.columns:
    if col not in numeric_cols:
        train_imputed[col] = train[col]
        
train = train_imputed


Epoch [10/100], Loss: 1.6710]
Epoch [20/100], Loss: 1.5469]
Epoch [30/100], Loss: 1.5154]
Epoch [40/100], Loss: 1.4932]
Epoch [50/100], Loss: 1.4964]
Epoch [60/100], Loss: 1.4920]
Epoch [70/100], Loss: 1.4309]
Epoch [80/100], Loss: 1.4185]
Epoch [90/100], Loss: 1.3667]
Epoch [100/100], Loss: 1.3620]
Epoch [10/100], Loss: 1.0070]
Epoch [20/100], Loss: 0.5783]
Epoch [30/100], Loss: 0.4271]
Epoch [40/100], Loss: 0.4271]
Epoch [50/100], Loss: 0.4271]
Epoch [60/100], Loss: 0.4271]
Epoch [70/100], Loss: 0.4271]
Epoch [80/100], Loss: 0.4271]
Epoch [90/100], Loss: 0.4271]
Epoch [100/100], Loss: 0.4271]


In [21]:
featuresCols = ['Basic_Demos-Age', 'Basic_Demos-Sex',
                'CGAS-CGAS_Score', 'Physical-BMI',
                'Physical-Height', 'Physical-Weight', 'Physical-Waist_Circumference',
                'Physical-Diastolic_BP', 'Physical-HeartRate', 'Physical-Systolic_BP',
                'Fitness_Endurance-Max_Stage',
                'Fitness_Endurance-Time_Mins', 'Fitness_Endurance-Time_Sec',
                'FGC-FGC_CU', 'FGC-FGC_CU_Zone', 'FGC-FGC_GSND',
                'FGC-FGC_GSND_Zone', 'FGC-FGC_GSD', 'FGC-FGC_GSD_Zone', 'FGC-FGC_PU',
                'FGC-FGC_PU_Zone', 'FGC-FGC_SRL', 'FGC-FGC_SRL_Zone', 'FGC-FGC_SRR',
                'FGC-FGC_SRR_Zone', 'FGC-FGC_TL', 'FGC-FGC_TL_Zone',
                'BIA-BIA_Activity_Level_num', 'BIA-BIA_BMC', 'BIA-BIA_BMI',
                'BIA-BIA_BMR', 'BIA-BIA_DEE', 'BIA-BIA_ECW', 'BIA-BIA_FFM',
                'BIA-BIA_FFMI', 'BIA-BIA_FMI', 'BIA-BIA_Fat', 'BIA-BIA_Frame_num',
                'BIA-BIA_ICW', 'BIA-BIA_LDM', 'BIA-BIA_LST', 'BIA-BIA_SMM',
                'BIA-BIA_TBW', 'PAQ_A-PAQ_A_Total',
                'PAQ_C-PAQ_C_Total', 'SDS-SDS_Total_Raw',
                'SDS-SDS_Total_T',
                'PreInt_EduHx-computerinternet_hoursday', 'BMI_Age','Internet_Hours_Age','BMI_Internet_Hours',
                'BFP_BMI', 'FFMI_BFP', 'FMI_BFP', 'LST_TBW', 'BFP_BMR', 'BFP_DEE', 'BMR_Weight', 'DEE_Weight',
                'SMM_Height', 'Muscle_to_Fat']
train = train[[c for c in featuresCols if c in train.columns] + ['sii']]
test = test[[c for c in featuresCols if c in train.columns]]

In [22]:
select_cols = [c for c in train.columns if (not train[c].isnull().values.any()) and (train[c].dtype in ['float64', 'int64', 'float32', 'int32'])]
X_df_train = train[select_cols].drop(columns=['sii'])
y_df_train = train['sii']
X_train = X_df_train.to_numpy()
y_train = y_df_train.to_numpy()

In [23]:
X_tensor = torch.tensor(X_train).float()
y_tensor = torch.tensor(y_train).float()

X_scales = torch.clamp(X_tensor.max(dim=0, keepdim=True).values, 1)

X_train.shape

(3960, 48)

In [24]:
from pytorch_tabnet.tab_model import TabNetClassifier

def create_tabnet(in_dim, out_dim):
    TabNet_Params = {
        'n_d': 64,              # Width of the decision prediction layer
        'n_a': 64,              # Width of the attention embedding for each step
        'n_steps': 5,           # Number of steps in the architecture
        'gamma': 1.5,           # Coefficient for feature selection regularization
        'n_independent': 2,     # Number of independent GLU layer in each GLU block
        'n_shared': 2,          # Number of shared GLU layer in each GLU block
        'mask_type': 'entmax',
    }
    tabnetreg = TabNetClassifier(**TabNet_Params)
    tabnetreg.fit(np.ones((out_dim, in_dim)), np.arange(out_dim), max_epochs=1)
    return tabnetreg.network.train().cuda()

In [25]:
import torch.nn as nn
import torch.nn.functional as F
import copy

def entropy_loss(p_logit):
    p = F.softmax(p_logit, dim=-1)
    return -1 * torch.sum(p * F.log_softmax(p_logit, dim=-1)) / p_logit.size()[0]

class FFN(nn.Module):
    def __init__(self, input_dim, output_dim, encoding_dim=None, act=F.gelu, norm=True):
        super(FFN, self).__init__()
        if encoding_dim is None: encoding_dim = input_dim * 4
        self.fc1 = nn.Linear(input_dim, encoding_dim)
        self.fc_interm = nn.Linear(encoding_dim, encoding_dim)
        self.fc2 = nn.Linear(encoding_dim, output_dim)
        if norm: self.norm = nn.LayerNorm(encoding_dim)
        else: self.norm = nn.Identity()
        self.act = act
        
    def forward(self, x):
        x = self.fc1(x)
        x = x + self.act(self.fc_interm(self.norm(x)))
        x = self.fc2(x)
        return x

class MLP(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers, dropout=0.1):
        super().__init__()
        self.num_layers = num_layers

        h = [hidden_dim] * (num_layers - 1)
        self.layers = nn.ModuleList(nn.Linear(n, k) for n, k in zip([input_dim] + h, h + [output_dim]))
        self.dropouts = nn.ModuleList(nn.Dropout(dropout) for _ in range(num_layers - 1))

    def forward(self, x):
        for i, layer in enumerate(self.layers):
            x = layer(x)
            if i < self.num_layers - 1:
                x = F.relu(x)
                x = self.dropouts[i](x)
        return x

    def _reset_parameters(self):
        
        for proj in self.layers:
            nn.init.xavier_uniform_(proj.weight, gain=1)
            nn.init.constant_(proj.bias, 0)
            
        self.layers[-1].weight.data *= 0.01
        self.layers[-1].bias.data = torch.ones(4) * -1.098
        
class FeatEng(nn.Module):
    def __init__(self, op='mult'):
        super(FeatEng, self).__init__()
        self.op = op
        self.params = nn.Parameter(torch.randn(3))
        
        
    def forward(self, x):
        if self.op == 'div':
            x = (x[:, [0]] /  (x[:, [1]].abs() + 1e-2))
        elif self.op == 'mult':
            x = x[:, [0]] * x[:, [1]]
        elif self.op == 'relu':
            x = F.relu(x[:, [0]] * self.params[0] + x[:, [1]] * self.params[1] + self.params[2])
        else:
            raise NotImplementedError()
            
        return x

class FeatGen(nn.Module):
    def __init__(self, n_feats, k=2, op='mult'):
        super(FeatGen, self).__init__()

        self.dist_select = nn.ParameterList([
            nn.Parameter(torch.randn(n_feats))
            for _ in range(k)
        ])
        
        self.fc_fe = FeatEng(op)
        self.k = k

        self.trigger = nn.Parameter(torch.randn(2))

        self.op = op
        
    def forward(self, x, fix_choice=False, hard=True, tau=1.):
        feats = []
        for i in range(self.k):
            if fix_choice:
                curr_sel = torch.zeros_like(self.dist_select[i]).to(x.device)
                curr_sel[torch.argmax(self.dist_select[i])] = 1.0
            else:
                if hard: curr_sel = F.gumbel_softmax(self.dist_select[i], tau=tau, hard=True)
                else: curr_sel = F.softmax(self.dist_select[i])
            curr_feat = (x * curr_sel).sum(dim=1, keepdim=True)
            feats.append(curr_feat)
        new_feat = self.fc_fe(torch.concatenate(feats, dim=1))

        if fix_choice: trigger = (self.trigger >= self.trigger[1]).float()
        elif hard: trigger = F.gumbel_softmax(self.trigger, tau=tau, hard=True)
        else: trigger = F.softmax(self.trigger)
        new_feat *= trigger[0]
        
        return new_feat

class FENet(nn.Module):
    def __init__(self, n_feats, max_new_feats=10, k=2):
        super(FENet, self).__init__()

        self.feat_generators = nn.ModuleList(
            [FeatGen(n_feats, k, 'div')
            for _ in range(max_new_feats // 2)]
            + [FeatGen(n_feats, k, 'mult')
            for _ in range(max_new_feats // 2)]
        )
        self.n_feats = n_feats
        width_mult = 1.0
        self.pool1 = nn.ModuleList([
            MLP(n_feats + len(self.feat_generators), int(128 * width_mult), 4, 4, 0.0),
            MLP(n_feats + len(self.feat_generators), int(256 * width_mult), 4, 2, 0.0),
            MLP(n_feats + len(self.feat_generators), int(64  * width_mult), 4, 4, 0.0),
            MLP(n_feats + len(self.feat_generators), int(128 * width_mult), 4, 2, 0.0),
        ])

        self.tabnet1 = create_tabnet(n_feats + len(self.feat_generators), 4)
        
        self.pool2 = nn.ModuleList([
            MLP(len(self.feat_generators), int(128 * width_mult), 4, 4, 0.0),
            MLP(len(self.feat_generators), int(256 * width_mult), 4, 2, 0.0),
            MLP(len(self.feat_generators), int(64  * width_mult), 4, 4, 0.0),
            MLP(len(self.feat_generators), int(128 * width_mult), 4, 2, 0.0),
        ])

        self.tabnet2 = create_tabnet(len(self.feat_generators), 4)

        self._reset_all_clf_parameters()
        self.tau = 10.
        
    def _reset_all_clf_parameters(self):
        for i in range(len(self.pool1)):
            self.pool1[i]._reset_parameters()
        self.tabnet1 = create_tabnet(self.n_feats + len(self.feat_generators), 4)
        for i in range(len(self.pool2)):
            self.pool2[i]._reset_parameters()
        self.tabnet2 = create_tabnet(len(self.feat_generators), 4)
    
        
    def forward(self, x, fix_choice=False, hard=True, tau=1.):
        new_feats = []
        for i, feat_gen in enumerate(self.feat_generators):
            new_feat = feat_gen(x, fix_choice, hard, tau)
            new_feats.append(new_feat)
        new_feats = torch.concatenate(new_feats, dim=1)
        return new_feats

    def calculate_entropy(self):
        ent = 0.0
        cnt = 0.0
        for i, feat_gen in enumerate(self.feat_generators):
            for j, dist in enumerate(feat_gen.dist_select):
                ent += entropy_loss(dist)
                cnt += 1
        ent /= cnt
        return ent
        
        
    def forward_sl(self, x, y, fix_choice=False, hard=True):
        new_feats = self.forward(x, fix_choice, hard, self.tau)
        all_feats = torch.concatenate([x, new_feats], dim=1)
        loss = 0.0
        
        for clf in self.pool1:
            loss += F.cross_entropy(clf(all_feats), y.long())
        logits, M_loss = self.tabnet1(all_feats)
        loss += F.cross_entropy(logits, y.long()) + M_loss

        for clf in self.pool2:
            loss += F.cross_entropy(clf(new_feats), y.long())
        logits, M_loss = self.tabnet2(new_feats)
        loss += F.cross_entropy(logits, y.long()) + M_loss
        
        if not fix_choice:
            loss += 0.1 * self.calculate_entropy()
        return loss



In [55]:
def print_features(fe_model):
    fe_pred = fe_model.cuda().forward((X_tensor / X_scales).cuda(), True, True)
    
    gt_df = train.copy()
    for ci, c in enumerate(X_df_train.columns):
        gt_df[c] /= X_scales.numpy()[:, ci]
    
    mses = []
    for newfeat_idx in range(fe_pred.shape[1]):
        fg = fe_model.feat_generators[newfeat_idx]
        feat_id = fg.dist_select[0].argmax().item()
        feat_jd = fg.dist_select[1].argmax().item()
        feat_i = X_df_train.columns[feat_id]
        feat_j = X_df_train.columns[feat_jd]
        if fg.op == 'mult':
            gt = (gt_df[feat_i] * gt_df[feat_j]).to_numpy()
        if fg.op == 'div':
            gt = (gt_df[feat_i] / (gt_df[feat_j].abs() + 1e-2)).to_numpy()

        trigger = (fg.trigger[0] > fg.trigger[1]).bool().item()
        if trigger:
            yp = fe_pred[:, newfeat_idx].cpu().numpy()
            mses += [np.mean((yp - gt)**2)]
        
        if fg.op == 'mult':
            print(f'df["Feat_{newfeat_idx}"] = df["{feat_i}"] * df["{feat_j}"]; trigger = {trigger}')
        if fg.op == 'div':
            print(f'df["Feat_{newfeat_idx}"] = df["{feat_i}"] / df["{feat_j}"]; trigger = {trigger}')
    
    print(f'dbg mse: {max(mses):.4f}')
    
def initialize_features(feat_tuples, n_feats, max_new_feats=10):
    
    def initialize_feat_gen(feat_id, feat_jd, op):
        fg = FeatGen(n_feats, 2, op)
        fg.dist_select[0].data *= 0.1
        fg.dist_select[0].data -= 0.5
        fg.dist_select[0][feat_id].data += 1.0
        
        fg.dist_select[1].data *= 0.1
        fg.dist_select[1].data -= 0.5
        fg.dist_select[1][feat_jd].data += 1.0
        fg.trigger.data = torch.tensor([.5, -.5])
        return fg
    
    feat_generators = []
    
    for feat_i, feat_j, op in feat_tuples:
        feat_id = X_df_train.columns.get_loc(feat_i)
        feat_jd = X_df_train.columns.get_loc(feat_j)
        fg = initialize_feat_gen(feat_id, feat_jd, op)
        feat_generators += [fg]

    while len(feat_generators) + 2 <= max_new_feats:
        feat_generators += [FeatGen(n_feats, 2, 'mult'), FeatGen(n_feats, 2, 'div')]
    feat_generators = nn.ModuleList(feat_generators)
    fe_model = FENet(n_feats, max_new_feats)
    fe_model.feat_generators = feat_generators
    return fe_model
        

In [27]:
fe_model_init = initialize_features(
    [
        ('Physical-BMI', 'Basic_Demos-Age', 'mult'),
        ('PreInt_EduHx-computerinternet_hoursday', 'Basic_Demos-Age', 'mult'),
        ('Physical-BMI', 'PreInt_EduHx-computerinternet_hoursday', 'mult'),
        ('BIA-BIA_Fat', 'BIA-BIA_BMI', 'div'),
        ('BIA-BIA_FFMI', 'BIA-BIA_Fat', 'div'),
        ('BIA-BIA_FMI', 'BIA-BIA_Fat', 'div'),
        ('BIA-BIA_LST', 'BIA-BIA_TBW', 'div'),
        ('BIA-BIA_Fat', 'BIA-BIA_BMR', 'mult'),
        ('BIA-BIA_Fat', 'BIA-BIA_DEE', 'mult'),
        ('BIA-BIA_BMR', 'Physical-Weight', 'div'),
        ('BIA-BIA_DEE', 'Physical-Weight', 'div'),
        ('BIA-BIA_SMM', 'Physical-Height', 'div'),
        ('BIA-BIA_SMM', 'BIA-BIA_FMI', 'div'),
        ('BIA-BIA_TBW', 'Physical-Weight', 'div'),
        ('BIA-BIA_ICW', 'BIA-BIA_TBW', 'div'),
        ('Physical-BMI', 'Physical-HeartRate', 'mult'),
    ],
    X_train.shape[1], 20
)
print_features(fe_model_init)

epoch 0  | loss: 0.0     |  0:00:00s
epoch 0  | loss: 0.0     |  0:00:00s
epoch 0  | loss: 0.0     |  0:00:00s
epoch 0  | loss: 0.0     |  0:00:00s
df["Feat_0"] = df["Physical-BMI"] * df["Basic_Demos-Age"]; trigger = True
df["Feat_1"] = df["PreInt_EduHx-computerinternet_hoursday"] * df["Basic_Demos-Age"]; trigger = True
df["Feat_2"] = df["Physical-BMI"] * df["PreInt_EduHx-computerinternet_hoursday"]; trigger = True
df["Feat_3"] = df["BIA-BIA_Fat"] / df["BIA-BIA_BMI"]; trigger = True
df["Feat_4"] = df["BIA-BIA_FFMI"] / df["BIA-BIA_Fat"]; trigger = True
df["Feat_5"] = df["BIA-BIA_FMI"] / df["BIA-BIA_Fat"]; trigger = True
df["Feat_6"] = df["BIA-BIA_LST"] / df["BIA-BIA_TBW"]; trigger = True
df["Feat_7"] = df["BIA-BIA_Fat"] * df["BIA-BIA_BMR"]; trigger = True
df["Feat_8"] = df["BIA-BIA_Fat"] * df["BIA-BIA_DEE"]; trigger = True
df["Feat_9"] = df["BIA-BIA_BMR"] / df["Physical-Weight"]; trigger = True
df["Feat_10"] = df["BIA-BIA_DEE"] / df["Physical-Weight"]; trigger = True
df["Feat_11"] = df[

In [28]:
X_np = (X_tensor / X_scales).cpu().numpy()
y_np = (y_tensor).long().cpu().numpy()

In [29]:
def linear_annealing(step, begin_val, end_val, total_steps):
    return begin_val + (end_val - begin_val) * (step / total_steps)


In [30]:

def get_model_size(model):
    model_parameters = filter(lambda p: p.requires_grad, model.parameters())
    model_size = sum([np.prod(p.size()) for p in model_parameters])
    return "{}K".format(round(model_size / 1e+1) / 1e2)

print(get_model_size(fe_model_init))


1070.16K


In [44]:
print('k')

k


In [31]:
sum([p.numel() for n, p in fe_model_init.named_parameters() if "feat_generators" not in n and p.requires_grad])

1068144

In [32]:
from tqdm import tqdm
import copy
fe_model = copy.deepcopy(fe_model_init)

param_dicts = [
    {
        "params": [p for n, p in fe_model.named_parameters() if "feat_generators" in n and p.requires_grad],
        "lr": 1e-1,
    },
    {
        "params": [p for n, p in fe_model.named_parameters() if "feat_generators" not in n and p.requires_grad],
        "lr": 1e-3,
    }
]


ema_loss = None

X_scales = torch.clamp(X_tensor.max(dim=0, keepdim=True).values, 1)
X_tensor_scaled = X_tensor / X_scales


batch_size=500



fe_model.train().cpu()
fe_model._reset_all_clf_parameters()
fe_model.train().cuda()

optimizer = optim.Adam(param_dicts)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 200, gamma=0.5)
epochs = 200
fe_model.cuda().train()
pbar = tqdm(range(epochs), delay=1.0, position=0, leave=True)
for epoch in pbar:
    for i in range(0, len(X_tensor), batch_size):
        batch = X_tensor_scaled[i : i + batch_size].cuda()
        labels = y_tensor[i : i + len(batch)].cuda()
        optimizer.zero_grad()
        loss = fe_model.forward_sl(batch, labels, fix_choice=False, hard=False)
        loss.backward()
        optimizer.step()
        # grad = torch.nn.utils.clip_grad_norm_(fe_model.parameters(), .1)

        if ema_loss is None: ema_loss = loss.item()
        ema_loss = 0.9 * ema_loss + 0.1 * loss.item()
        if i % (batch_size * 10) == 0: 
            pbar.set_description_str(f'loss: {ema_loss:.4f}, tau: {fe_model.tau:.4f}, ent: {fe_model.calculate_entropy():.4f}, lr: {scheduler.get_last_lr()[-1]:.3e}')
    pbar.set_description_str(f'loss: {ema_loss:.4f}, tau: {fe_model.tau:.4f}, ent: {fe_model.calculate_entropy():.4f}, lr: {scheduler.get_last_lr()[-1]:.3e}')

fe_model.train().cpu()
fe_model._reset_all_clf_parameters()
fe_model.train().cuda()

optimizer = optim.Adam(param_dicts, lr=1e-3)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1000, gamma=0.5)
epochs = 5000
pbar = tqdm(range(epochs), delay=1.0, position=0, leave=True)
for epoch in pbar:
    fe_model.tau = linear_annealing(epoch, 2., .1, epochs)
    for i in range(0, len(X_tensor), batch_size):
        batch = X_tensor_scaled[i : i + batch_size].cuda()
        labels = y_tensor[i : i + batch_size].cuda()
        optimizer.zero_grad()
        loss = fe_model.forward_sl(batch, labels, fix_choice=False, hard=True)
        loss.backward()
        optimizer.step()
        # grad = torch.nn.utils.clip_grad_norm_(fe_model.parameters(), .1)

        if ema_loss is None: ema_loss = loss.item()
        ema_loss = 0.9 * ema_loss + 0.1 * loss.item()
        if i % (batch_size * 10) == 0: 
            pbar.set_description_str(f'loss: {ema_loss:.4f}, tau: {fe_model.tau:.4f}, ent: {fe_model.calculate_entropy():.4f}, lr: {scheduler.get_last_lr()[-1]:.3e}')
    pbar.set_description_str(f'loss: {ema_loss:.4f}, tau: {fe_model.tau:.4f}, ent: {fe_model.calculate_entropy():.4f}, lr: {scheduler.get_last_lr()[-1]:.3e}')
    scheduler.step()
    if epoch % 100 == 0: print(f'Ep {epoch}, loss={ema_loss:.4f}')

epoch 0  | loss: 0.0     |  0:00:00s
epoch 0  | loss: 0.0     |  0:00:00s


loss: 10.4131, tau: 10.0000, ent: 0.0230, lr: 1.000e-03: 100%|██████████| 200/200 [02:49<00:00,  1.18it/s]


epoch 0  | loss: 0.0     |  0:00:00s
epoch 0  | loss: 0.0     |  0:00:00s


loss: 12.5922, tau: 1.9996, ent: 0.0221, lr: 1.000e-03:   0%|          | 0/5000 [00:01<?, ?it/s]

Ep 0, loss=12.4985


loss: 11.7800, tau: 1.9616, ent: 0.0235, lr: 1.000e-03:   2%|▏         | 101/5000 [01:34<1:15:41,  1.08it/s]

Ep 100, loss=11.7925


loss: 11.5742, tau: 1.9236, ent: 0.0152, lr: 1.000e-03:   4%|▍         | 201/5000 [03:07<1:13:08,  1.09it/s]

Ep 200, loss=11.5939


loss: 11.2333, tau: 1.8856, ent: 0.0096, lr: 1.000e-03:   6%|▌         | 301/5000 [04:41<1:11:55,  1.09it/s]

Ep 300, loss=11.2468


loss: 11.1048, tau: 1.8476, ent: 0.0084, lr: 1.000e-03:   8%|▊         | 401/5000 [06:15<1:10:31,  1.09it/s]

Ep 400, loss=11.0983


loss: 10.6449, tau: 1.8096, ent: 0.0070, lr: 1.000e-03:  10%|█         | 501/5000 [07:48<1:09:03,  1.09it/s]

Ep 500, loss=10.6448


loss: 10.4590, tau: 1.7716, ent: 0.0059, lr: 1.000e-03:  12%|█▏        | 601/5000 [09:22<1:06:28,  1.10it/s]

Ep 600, loss=10.3871


loss: 10.2920, tau: 1.7336, ent: 0.0045, lr: 1.000e-03:  14%|█▍        | 701/5000 [10:55<1:05:40,  1.09it/s]

Ep 700, loss=10.2688


loss: 10.3438, tau: 1.6956, ent: 0.0045, lr: 1.000e-03:  16%|█▌        | 801/5000 [12:28<1:02:44,  1.12it/s]

Ep 800, loss=10.3097


loss: 9.5437, tau: 1.6576, ent: 0.0034, lr: 1.000e-03:  18%|█▊        | 901/5000 [14:01<1:01:47,  1.11it/s] 

Ep 900, loss=9.5352


loss: 9.5930, tau: 1.6196, ent: 0.0035, lr: 5.000e-04:  20%|██        | 1001/5000 [15:34<1:01:50,  1.08it/s]

Ep 1000, loss=9.5488


loss: 9.3192, tau: 1.5816, ent: 0.0036, lr: 5.000e-04:  22%|██▏       | 1101/5000 [17:06<1:01:17,  1.06it/s]

Ep 1100, loss=9.2772


loss: 9.3712, tau: 1.5436, ent: 0.0049, lr: 5.000e-04:  24%|██▍       | 1201/5000 [18:39<59:37,  1.06it/s]  

Ep 1200, loss=9.3378


loss: 9.4511, tau: 1.5056, ent: 0.0049, lr: 5.000e-04:  26%|██▌       | 1301/5000 [20:11<58:41,  1.05it/s]  

Ep 1300, loss=9.3466


loss: 9.0421, tau: 1.4676, ent: 0.0042, lr: 5.000e-04:  28%|██▊       | 1401/5000 [21:43<55:04,  1.09it/s]  

Ep 1400, loss=9.0086


loss: 8.9150, tau: 1.4296, ent: 0.0050, lr: 5.000e-04:  30%|███       | 1501/5000 [23:16<53:20,  1.09it/s]  

Ep 1500, loss=8.9126


loss: 8.6218, tau: 1.3916, ent: 0.0052, lr: 5.000e-04:  32%|███▏      | 1601/5000 [24:50<57:17,  1.01s/it]  

Ep 1600, loss=8.5974


loss: 8.5532, tau: 1.3536, ent: 0.0052, lr: 5.000e-04:  34%|███▍      | 1701/5000 [26:24<53:27,  1.03it/s]

Ep 1700, loss=8.5357


loss: 8.8086, tau: 1.3156, ent: 0.0043, lr: 5.000e-04:  36%|███▌      | 1801/5000 [27:57<48:41,  1.09it/s]

Ep 1800, loss=8.8004


loss: 8.4957, tau: 1.2776, ent: 0.0038, lr: 5.000e-04:  38%|███▊      | 1901/5000 [29:29<46:10,  1.12it/s]

Ep 1900, loss=8.4724


loss: 8.6213, tau: 1.2396, ent: 0.0041, lr: 2.500e-04:  40%|████      | 2001/5000 [31:01<44:36,  1.12it/s]

Ep 2000, loss=8.5921


loss: 8.6246, tau: 1.2016, ent: 0.0041, lr: 2.500e-04:  42%|████▏     | 2101/5000 [32:34<44:46,  1.08it/s]

Ep 2100, loss=8.5946


loss: 8.5914, tau: 1.1636, ent: 0.0043, lr: 2.500e-04:  44%|████▍     | 2201/5000 [34:06<42:25,  1.10it/s]

Ep 2200, loss=8.5576


loss: 8.5190, tau: 1.1256, ent: 0.0041, lr: 2.500e-04:  46%|████▌     | 2301/5000 [35:39<41:42,  1.08it/s]

Ep 2300, loss=8.5071


loss: 8.4629, tau: 1.0876, ent: 0.0042, lr: 2.500e-04:  48%|████▊     | 2401/5000 [37:11<40:07,  1.08it/s]

Ep 2400, loss=8.4435


loss: 8.3282, tau: 1.0496, ent: 0.0035, lr: 2.500e-04:  50%|█████     | 2501/5000 [38:44<38:31,  1.08it/s]

Ep 2500, loss=8.3240


loss: 8.4163, tau: 1.0116, ent: 0.0036, lr: 2.500e-04:  52%|█████▏    | 2601/5000 [40:16<35:53,  1.11it/s]

Ep 2600, loss=8.4022


loss: 8.2164, tau: 0.9736, ent: 0.0031, lr: 2.500e-04:  54%|█████▍    | 2701/5000 [41:48<34:38,  1.11it/s]

Ep 2700, loss=8.1961


loss: 8.3494, tau: 0.9356, ent: 0.0039, lr: 2.500e-04:  56%|█████▌    | 2801/5000 [43:20<34:44,  1.06it/s]

Ep 2800, loss=8.3238


loss: 8.1811, tau: 0.8976, ent: 0.0034, lr: 2.500e-04:  58%|█████▊    | 2901/5000 [44:54<31:51,  1.10it/s]

Ep 2900, loss=8.1550


loss: 8.4213, tau: 0.8596, ent: 0.0040, lr: 1.250e-04:  60%|██████    | 3001/5000 [46:26<31:31,  1.06it/s]

Ep 3000, loss=8.4305


loss: 8.2814, tau: 0.8216, ent: 0.0038, lr: 1.250e-04:  62%|██████▏   | 3101/5000 [47:58<28:34,  1.11it/s]

Ep 3100, loss=8.2830


loss: 8.1175, tau: 0.7836, ent: 0.0038, lr: 1.250e-04:  64%|██████▍   | 3201/5000 [49:31<28:11,  1.06it/s]

Ep 3200, loss=8.1195


loss: 8.1497, tau: 0.7456, ent: 0.0038, lr: 1.250e-04:  66%|██████▌   | 3301/5000 [51:03<26:20,  1.07it/s]

Ep 3300, loss=8.1309


loss: 8.2088, tau: 0.7076, ent: 0.0034, lr: 1.250e-04:  68%|██████▊   | 3401/5000 [52:35<25:00,  1.07it/s]

Ep 3400, loss=8.1844


loss: 7.9446, tau: 0.6696, ent: 0.0032, lr: 1.250e-04:  70%|███████   | 3501/5000 [54:07<23:27,  1.07it/s]

Ep 3500, loss=7.9438


loss: 7.9807, tau: 0.6316, ent: 0.0033, lr: 1.250e-04:  72%|███████▏  | 3601/5000 [55:39<21:02,  1.11it/s]

Ep 3600, loss=7.9607


loss: 7.9381, tau: 0.5936, ent: 0.0031, lr: 1.250e-04:  74%|███████▍  | 3701/5000 [57:12<19:38,  1.10it/s]

Ep 3700, loss=7.9567


loss: 7.9211, tau: 0.5556, ent: 0.0029, lr: 1.250e-04:  76%|███████▌  | 3801/5000 [58:44<18:02,  1.11it/s]

Ep 3800, loss=7.9541


loss: 8.0604, tau: 0.5176, ent: 0.0029, lr: 1.250e-04:  78%|███████▊  | 3901/5000 [1:00:17<16:54,  1.08it/s]

Ep 3900, loss=8.0239


loss: 8.0979, tau: 0.4796, ent: 0.0029, lr: 6.250e-05:  80%|████████  | 4001/5000 [1:01:50<15:04,  1.10it/s]

Ep 4000, loss=8.1209


loss: 8.0287, tau: 0.4416, ent: 0.0029, lr: 6.250e-05:  82%|████████▏ | 4101/5000 [1:03:22<13:33,  1.11it/s]

Ep 4100, loss=8.0107


loss: 7.9543, tau: 0.4036, ent: 0.0030, lr: 6.250e-05:  84%|████████▍ | 4201/5000 [1:04:55<12:22,  1.08it/s]

Ep 4200, loss=7.9226


loss: 7.8416, tau: 0.3656, ent: 0.0031, lr: 6.250e-05:  86%|████████▌ | 4301/5000 [1:06:27<10:39,  1.09it/s]

Ep 4300, loss=7.8388


loss: 8.0316, tau: 0.3276, ent: 0.0030, lr: 6.250e-05:  88%|████████▊ | 4401/5000 [1:07:59<09:12,  1.08it/s]

Ep 4400, loss=8.0179


loss: 7.8889, tau: 0.2896, ent: 0.0030, lr: 6.250e-05:  90%|█████████ | 4501/5000 [1:09:32<07:36,  1.09it/s]

Ep 4500, loss=7.7569


loss: 7.7963, tau: 0.2516, ent: 0.0029, lr: 6.250e-05:  92%|█████████▏| 4601/5000 [1:11:04<06:13,  1.07it/s]

Ep 4600, loss=7.7961


loss: 7.8763, tau: 0.2136, ent: 0.0030, lr: 6.250e-05:  94%|█████████▍| 4701/5000 [1:12:36<04:32,  1.10it/s]

Ep 4700, loss=7.8682


loss: 8.0707, tau: 0.1756, ent: 0.0029, lr: 6.250e-05:  96%|█████████▌| 4801/5000 [1:14:08<03:02,  1.09it/s]

Ep 4800, loss=8.0957


loss: 8.0026, tau: 0.1376, ent: 0.0030, lr: 6.250e-05:  98%|█████████▊| 4901/5000 [1:15:41<01:34,  1.05it/s]

Ep 4900, loss=8.0328


loss: 7.9498, tau: 0.1004, ent: 0.0030, lr: 6.250e-05: 100%|██████████| 5000/5000 [1:17:12<00:00,  1.08it/s]


In [45]:

# fe_model.train().cpu()
# fe_model._reset_all_clf_parameters()
# fe_model.train().cuda()

# optimizer = optim.Adam(fe_model.parameters(), lr=1e-3)
# scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 200, gamma=0.5)
# epochs = 500
# batch_size=500
# pbar = tqdm(range(epochs), delay=1.0, position=0, leave=True)
# for epoch in pbar:
#     for i in range(0, len(X_tensor), batch_size):
#         batch = X_tensor_scaled[i : i + batch_size].cuda()
#         labels = y_tensor[i : i + batch_size].cuda()
#         optimizer.zero_grad()
#         loss = fe_model.forward_sl(batch, labels, fix_choice=True, hard=True)
#         loss.backward()
#         optimizer.step()
#         # grad = torch.nn.utils.clip_grad_norm_(fe_model.parameters(), .1)

#         if ema_loss is None: ema_loss = loss.item()
#         ema_loss = 0.9 * ema_loss + 0.1 * loss.item()
#         if i % (batch_size * 10) == 0: 
#             pbar.set_description_str(f'loss: {ema_loss:.4f}, lr: {scheduler.get_last_lr()[-1]:.3e}')
#     pbar.set_description_str(f'loss: {ema_loss:.4f}, lr: {scheduler.get_last_lr()[-1]:.3e}')


In [46]:
if np.any(np.isinf(train)):
    train = train.replace([np.inf, -np.inf], np.nan)

def quadratic_weighted_kappa(y_true, y_pred):
    return cohen_kappa_score(y_true, y_pred, weights='quadratic')

def threshold_Rounder(oof_non_rounded, thresholds):
    return np.where(oof_non_rounded < thresholds[0], 0,
                    np.where(oof_non_rounded < thresholds[1], 1,
                             np.where(oof_non_rounded < thresholds[2], 2, 3)))

def evaluate_predictions(thresholds, y_true, oof_non_rounded):
    rounded_p = threshold_Rounder(oof_non_rounded, thresholds)
    return -quadratic_weighted_kappa(y_true, rounded_p)

In [47]:

fe_model.eval().cuda()
with torch.no_grad():
    total_loss1 = fe_model.forward_sl((X_tensor / X_scales).cuda(), y_tensor.cuda(), False, False)
    total_loss2 = fe_model.forward_sl((X_tensor / X_scales).cuda(), y_tensor.cuda(), False, True)
    total_loss3 = fe_model.forward_sl((X_tensor / X_scales).cuda(), y_tensor.cuda(), True, True)
print(f'Losses: {total_loss1.item():.4f}, {total_loss2.item():.4f}, {total_loss3.item():.4f}')

Losses: 4.3793, 12.6017, 5.7148


In [48]:
X1 = (X_tensor/X_scales).cpu().numpy()
fe_model.eval().cuda()
with torch.no_grad():
    X2 = (X_tensor / X_scales).cuda()
    new_feats = fe_model.forward(X2, True, True, 0.1)
    X2 = torch.concatenate([X2, new_feats], dim=1).cpu().numpy()


In [49]:

XGB_Params = {
    'learning_rate': 0.05,
    'max_depth': 6,
    'n_estimators': 200,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'reg_alpha': 1,  # Increased from 0.1
    'reg_lambda': 5,  # Increased from 1
    'random_state': SEED,
    'tree_method': 'gpu_hist',

}
XGB_Model1 = XGBRegressor(**XGB_Params)
XGB_Model2 = XGBRegressor(**XGB_Params)

In [50]:
XGB_Model1.fit(X1, y_np)
yp1 = XGB_Model1.predict(X1)
(yp1.round(0) == y_np).mean()

0.8957070707070707

In [51]:
XGB_Model1.fit(X2, y_np)
yp2 = XGB_Model1.predict(X2)
(yp2.round(0) == y_np).mean()

0.9005050505050505

In [52]:
len(fe_model_init.feat_generators)

20

In [56]:
print_features(fe_model_init)

df["Feat_0"] = df["Physical-BMI"] * df["Basic_Demos-Age"]; trigger = True
df["Feat_1"] = df["PreInt_EduHx-computerinternet_hoursday"] * df["Basic_Demos-Age"]; trigger = True
df["Feat_2"] = df["Physical-BMI"] * df["PreInt_EduHx-computerinternet_hoursday"]; trigger = True
df["Feat_3"] = df["BIA-BIA_Fat"] / df["BIA-BIA_BMI"]; trigger = True
df["Feat_4"] = df["BIA-BIA_FFMI"] / df["BIA-BIA_Fat"]; trigger = True
df["Feat_5"] = df["BIA-BIA_FMI"] / df["BIA-BIA_Fat"]; trigger = True
df["Feat_6"] = df["BIA-BIA_LST"] / df["BIA-BIA_TBW"]; trigger = True
df["Feat_7"] = df["BIA-BIA_Fat"] * df["BIA-BIA_BMR"]; trigger = True
df["Feat_8"] = df["BIA-BIA_Fat"] * df["BIA-BIA_DEE"]; trigger = True
df["Feat_9"] = df["BIA-BIA_BMR"] / df["Physical-Weight"]; trigger = True
df["Feat_10"] = df["BIA-BIA_DEE"] / df["Physical-Weight"]; trigger = True
df["Feat_11"] = df["BIA-BIA_SMM"] / df["Physical-Height"]; trigger = True
df["Feat_12"] = df["BIA-BIA_SMM"] / df["BIA-BIA_FMI"]; trigger = True
df["Feat_13"] = df["BIA

In [57]:
print_features(fe_model)

df["Feat_0"] = df["BIA-BIA_Fat"] * df["BIA-BIA_Fat"]; trigger = True
df["Feat_1"] = df["BIA-BIA_LDM"] * df["BIA-BIA_ECW"]; trigger = True
df["Feat_2"] = df["Physical-Diastolic_BP"] * df["BIA-BIA_Frame_num"]; trigger = True
df["Feat_3"] = df["PAQ_A-PAQ_A_Total"] / df["FGC-FGC_SRL"]; trigger = True
df["Feat_4"] = df["BIA-BIA_BMR"] / df["SDS-SDS_Total_T"]; trigger = True
df["Feat_5"] = df["BIA-BIA_FMI"] / df["Physical-Diastolic_BP"]; trigger = True
df["Feat_6"] = df["FGC-FGC_PU_Zone"] / df["FGC-FGC_SRR_Zone"]; trigger = True
df["Feat_7"] = df["BIA-BIA_BMI"] * df["BIA-BIA_Fat"]; trigger = True
df["Feat_8"] = df["FGC-FGC_SRL"] * df["BIA-BIA_SMM"]; trigger = True
df["Feat_9"] = df["BIA-BIA_LDM"] / df["FGC-FGC_SRL"]; trigger = False
df["Feat_10"] = df["FGC-FGC_CU"] / df["PreInt_EduHx-computerinternet_hoursday"]; trigger = True
df["Feat_11"] = df["BIA-BIA_ECW"] / df["BIA-BIA_FMI"]; trigger = True
df["Feat_12"] = df["Fitness_Endurance-Time_Sec"] / df["SDS-SDS_Total_Raw"]; trigger = False
df["Fe