In [1]:
import numpy as np
import pandas as pd
import os
import re
from sklearn.base import clone
from sklearn.metrics import cohen_kappa_score
from sklearn.model_selection import StratifiedKFold
from scipy.optimize import minimize
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from keras.models import Model
from keras.layers import Input, Dense
from keras.optimizers import Adam
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from colorama import Fore, Style
from IPython.display import clear_output
import warnings
from lightgbm import LGBMRegressor
from xgboost import XGBRegressor
from catboost import CatBoostRegressor
from sklearn.ensemble import VotingRegressor, RandomForestRegressor, GradientBoostingRegressor
from sklearn.impute import SimpleImputer, KNNImputer
from sklearn.pipeline import Pipeline
from scipy import stats

In [2]:
# Create output folders
output_folder = 'output'
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

# Create separate analysis output folders
analysis_output_folder = 'analysis_output'
os.makedirs(analysis_output_folder, exist_ok=True)

physical_analysis_output_folder = 'analysis_output/physical'
os.makedirs(physical_analysis_output_folder, exist_ok=True)

fitness_analysis_output_folder = 'analysis_output/fitness'
os.makedirs(fitness_analysis_output_folder, exist_ok=True)

bia_analysis_output_folder = 'analysis_output/bia'
os.makedirs(bia_analysis_output_folder, exist_ok=True)

child_info_analysis_output_folder = 'analysis_output/child_info'
os.makedirs(child_info_analysis_output_folder, exist_ok=True)

actigraphy_analysis_output_folder = 'analysis_output/actigraphy'
os.makedirs(actigraphy_analysis_output_folder, exist_ok=True)

# Set display all columns in dataframes property
pd.options.display.max_columns = None

# Supress warnings
warnings.filterwarnings('ignore')# Create output folders
output_folder = 'output'
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

# Create separate analysis output folders
analysis_output_folder = 'analysis_output'
os.makedirs(analysis_output_folder, exist_ok=True)

physical_analysis_output_folder = 'analysis_output/physical'
os.makedirs(physical_analysis_output_folder, exist_ok=True)

fitness_analysis_output_folder = 'analysis_output/fitness'
os.makedirs(fitness_analysis_output_folder, exist_ok=True)

bia_analysis_output_folder = 'analysis_output/bia'
os.makedirs(bia_analysis_output_folder, exist_ok=True)

child_info_analysis_output_folder = 'analysis_output/child_info'
os.makedirs(child_info_analysis_output_folder, exist_ok=True)

actigraphy_analysis_output_folder = 'analysis_output/actigraphy'
os.makedirs(actigraphy_analysis_output_folder, exist_ok=True)

# Set display all columns in dataframes property
pd.options.display.max_columns = None

# Supress warnings
warnings.filterwarnings('ignore')

In [3]:
# Load and process data files
def process_file(filename, dirname):
    df = pd.read_parquet(os.path.join(dirname, filename, 'part-0.parquet'))
    df.drop('step', axis=1, inplace=True)
    return df.describe().values.reshape(-1), filename.split('=')[1]

# Load time series data
def load_time_series(dirname) -> pd.DataFrame:
    ids = os.listdir(dirname)
    
    with ThreadPoolExecutor() as executor:
        results = list(tqdm(executor.map(lambda fname: process_file(fname, dirname), ids), total=len(ids)))
    
    stats, indexes = zip(*results)
    
    df = pd.DataFrame(stats, columns=[f"stat_{i}" for i in range(len(stats[0]))])
    df['id'] = indexes
    return df

In [4]:
# Load data
train = pd.read_csv('input/train.csv')
test = pd.read_csv('input/test.csv')
sample = pd.read_csv('input/sample_submission.csv')

train_ts = load_time_series("input/series_train.parquet")
test_ts = load_time_series("input/series_test.parquet")

df_train = train_ts.drop('id', axis=1)
df_test = test_ts.drop('id', axis=1)

100%|██████████| 996/996 [01:15<00:00, 13.14it/s]
100%|██████████| 2/2 [00:00<00:00, 10.92it/s]


In [5]:
# Sparse Autoencoder Model
class SparseAutoencoder(nn.Module):
    def __init__(self, input_dim, sparsity_weight=1e-5):
        super(SparseAutoencoder, self).__init__()
        self.sparsity_weight = sparsity_weight
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 16),
            nn.ReLU()
        )
        
        self.decoder = nn.Sequential(
            nn.Linear(16, 32),
            nn.ReLU(),
            nn.Linear(32, 64),
            nn.ReLU(),
            nn.Linear(64, input_dim),
            nn.Sigmoid()  # Outputs in the range [0, 1]
        )
        
    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return encoded, decoded

# Preparing Data
# Option to use different scalers: MinMaxScaler, StandardScaler, RobustScaler
def prepare_data(data, scaler_type='MinMaxScaler'):
    if scaler_type == 'StandardScaler':
        scaler = StandardScaler()
    elif scaler_type == 'RobustScaler':
        scaler = RobustScaler()
    else:
        scaler = MinMaxScaler()
    
    data_scaled = scaler.fit_transform(data)
    return torch.tensor(data_scaled, dtype=torch.float32), scaler

# Apply PCA for Dimensionality Reduction
# This can help focus the autoencoder on the most relevant features
def apply_pca(data, n_components=0.95):
    pca = PCA(n_components=n_components)
    data_pca = pca.fit_transform(data)
    return data_pca, pca

# Early Stopping Functionality
def early_stopping(patience):
    class EarlyStopping:
        def __init__(self, patience=patience):
            self.patience = patience
            self.counter = 0
            self.best_loss = float('inf')
            self.early_stop = False
        
        def __call__(self, loss):
            if loss < self.best_loss:
                self.best_loss = loss
                self.counter = 0
            else:
                self.counter += 1
                if self.counter >= self.patience:
                    self.early_stop = True
    return EarlyStopping()

# Training the Sparse Autoencoder with DataFrame Output
def perform_autoencoder(data, epochs=100, batch_size=32, learning_rate=0.001, patience=10, scaler_type='MinMaxScaler', use_pca=False, sparsity_weight=1e-5):
    # Preprocess Data
    if use_pca:
        data, pca = apply_pca(data)

    data_tensor, scaler = prepare_data(data, scaler_type=scaler_type)
    train_data, val_data = train_test_split(data_tensor, test_size=0.2, random_state=42)

    train_loader = DataLoader(TensorDataset(train_data), batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(TensorDataset(val_data), batch_size=batch_size, shuffle=False)

    model = SparseAutoencoder(input_dim=data.shape[1], sparsity_weight=sparsity_weight)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)

    criterion = nn.SmoothL1Loss()  # Changed to Smooth L1 Loss
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    stopper = early_stopping(patience=patience)

    for epoch in range(epochs):
        model.train()
        train_loss = 0.0
        for batch in train_loader:
            batch = batch[0].to(device)
            optimizer.zero_grad()
            encoded, outputs = model(batch)
            
            # Reconstruction loss
            loss = criterion(outputs, batch)
            
            # Sparsity penalty (L1 regularization on encoded activations)
            l1_penalty = torch.mean(torch.abs(encoded))
            loss += sparsity_weight * l1_penalty
            
            loss.backward()
            optimizer.step()
            train_loss += loss.item() * batch.size(0)

        train_loss /= len(train_loader.dataset)

        # Validation
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for batch in val_loader:
                batch = batch[0].to(device)
                _, outputs = model(batch)
                loss = criterion(outputs, batch)
                val_loss += loss.item() * batch.size(0)

        val_loss /= len(val_loader.dataset)
        print(f"Epoch {epoch+1}, Train Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}")

        # Early stopping
        stopper(val_loss)
        if stopper.early_stop:
            print(f"Early stopping at epoch {epoch + 1}")
            break

    # Convert tensor back to DataFrame for consistency
    _, data_decoded = model(data_tensor.to(device))
    data_decoded = data_decoded.cpu().detach().numpy()
    df_encoded = pd.DataFrame(data_decoded, columns=[f'feature_{i}' for i in range(data_decoded.shape[1])])
    return df_encoded

In [6]:
# Encode time series data
train_ts_encoded = perform_autoencoder(df_train, epochs=100, batch_size=32, learning_rate=0.001, patience=10, use_pca=False, scaler_type='MinMaxScaler', sparsity_weight=1e-5)
test_ts_encoded = perform_autoencoder(df_test, epochs=100, batch_size=32, learning_rate=0.001, patience=10, use_pca=False, scaler_type='MinMaxScaler', sparsity_weight=1e-5)

train_ts_encoded["id"]=train_ts["id"]
test_ts_encoded['id']=test_ts["id"]

# Merge data
train = pd.merge(train, train_ts_encoded, how="left", on='id')
test = pd.merge(test, test_ts_encoded, how="left", on='id')

train = train.drop('id', axis=1)
test = test.drop('id', axis=1)

Epoch 1, Train Loss: 0.0474, Validation Loss: 0.0384
Epoch 2, Train Loss: 0.0259, Validation Loss: 0.0193
Epoch 3, Train Loss: 0.0187, Validation Loss: 0.0183
Epoch 4, Train Loss: 0.0182, Validation Loss: 0.0181
Epoch 5, Train Loss: 0.0181, Validation Loss: 0.0180
Epoch 6, Train Loss: 0.0180, Validation Loss: 0.0180
Epoch 7, Train Loss: 0.0180, Validation Loss: 0.0180
Epoch 8, Train Loss: 0.0178, Validation Loss: 0.0177
Epoch 9, Train Loss: 0.0171, Validation Loss: 0.0161
Epoch 10, Train Loss: 0.0142, Validation Loss: 0.0124
Epoch 11, Train Loss: 0.0110, Validation Loss: 0.0094
Epoch 12, Train Loss: 0.0085, Validation Loss: 0.0075
Epoch 13, Train Loss: 0.0074, Validation Loss: 0.0067
Epoch 14, Train Loss: 0.0069, Validation Loss: 0.0063
Epoch 15, Train Loss: 0.0066, Validation Loss: 0.0061
Epoch 16, Train Loss: 0.0064, Validation Loss: 0.0060
Epoch 17, Train Loss: 0.0063, Validation Loss: 0.0059
Epoch 18, Train Loss: 0.0062, Validation Loss: 0.0057
Epoch 19, Train Loss: 0.0061, Validat

In [7]:
# Skew removal for some columns
skewed_columns = [
    'BIA-BIA_BMC', 'BIA-BIA_BMR', 'BIA-BIA_DEE', 'BIA-BIA_ECW', 'BIA-BIA_Fat',
    'BIA-BIA_FFM', 'BIA-BIA_FFMI', 'BIA-BIA_FMI', 'BIA-BIA_ICW', 'BIA-BIA_LDM', 'BIA-BIA_LST', 
    'BIA-BIA_TBW', 'CGAS-CGAS_Score', 'feature_23', 'feature_35', 'feature_38', 'feature_40', 'feature_47',
    'feature_54', 'feature_66', 'feature_78', 'feature_80', 'feature_88', 'feature_90'
]
lambda_params = {}

# Define the box-cox function to remove skew
def box_cox_transform(df, column, lambda_param=None):
    # Create a copy of the dataframe
    df_copy = df.copy()
    
    # Drop NaN values for the specific column
    df_copy = df_copy.dropna(subset=[column])
    
    # Ensure all values are positive
    min_value = df_copy[column].min()
    if min_value <= 0:
        df_copy[column] = df_copy[column] - min_value + 1  # Add 1 to ensure all values are positive
    
    # Perform Box-Cox transformation
    if lambda_param is None:
        df_copy[f'{column}_boxcox'], lambda_param = stats.boxcox(df_copy[column])
        print(f"Transforming column: {column}")
        print(f"Optimal lambda for Box-Cox transformation: {lambda_param}")
    else:
        df_copy[f'{column}_boxcox'] = stats.boxcox(df_copy[column], lmbda=lambda_param)
        print(f"Applying transformation to column: {column} with lambda: {lambda_param}")
    
    print(f"Number of rows before transformation: {len(df)}")
    print(f"Number of rows after removing NaN values: {len(df_copy)}")
    
    return df_copy, lambda_param

# Apply Box-Cox transformation to train data and store lambda values
for column in skewed_columns:
    transformed_train_data, lambda_params[column] = box_cox_transform(train, column)
    # Update only the new transformed column in the original dataframe
    train[f'{column}_boxcox'] = transformed_train_data[f'{column}_boxcox']

# Apply the same transformation to test data using stored lambda values
for column in skewed_columns:
    transformed_test_data, _ = box_cox_transform(test, column, lambda_param=lambda_params[column])
    # Update only the new transformed column in the original dataframe
    test[f'{column}_boxcox'] = transformed_test_data[f'{column}_boxcox']

# Function to handle infinite values
def replace_inf_with_max(df):
    for column in df.columns:
        if df[column].dtype == 'float64':
            max_value = df[column][~np.isinf(df[column])].max()
            df[column] = df[column].replace([np.inf, -np.inf], max_value)
    return df

# Replace infinite values with the maximum non-infinite value in each column
train_data = replace_inf_with_max(train)
test_data = replace_inf_with_max(test)

Transforming column: BIA-BIA_BMC
Optimal lambda for Box-Cox transformation: -0.26544288750244394
Number of rows before transformation: 3960
Number of rows after removing NaN values: 1991
Transforming column: BIA-BIA_BMR
Optimal lambda for Box-Cox transformation: -2.024016452566404
Number of rows before transformation: 3960
Number of rows after removing NaN values: 1991
Transforming column: BIA-BIA_DEE
Optimal lambda for Box-Cox transformation: -0.9862196352522961
Number of rows before transformation: 3960
Number of rows after removing NaN values: 1991
Transforming column: BIA-BIA_ECW
Optimal lambda for Box-Cox transformation: -0.11312798067663181
Number of rows before transformation: 3960
Number of rows after removing NaN values: 1991
Transforming column: BIA-BIA_Fat
Optimal lambda for Box-Cox transformation: 27.718481796974547
Number of rows before transformation: 3960
Number of rows after removing NaN values: 1991
Transforming column: BIA-BIA_FFM
Optimal lambda for Box-Cox transforma

In [8]:
train.head(10)

Unnamed: 0,Basic_Demos-Enroll_Season,Basic_Demos-Age,Basic_Demos-Sex,CGAS-Season,CGAS-CGAS_Score,Physical-Season,Physical-BMI,Physical-Height,Physical-Weight,Physical-Waist_Circumference,Physical-Diastolic_BP,Physical-HeartRate,Physical-Systolic_BP,Fitness_Endurance-Season,Fitness_Endurance-Max_Stage,Fitness_Endurance-Time_Mins,Fitness_Endurance-Time_Sec,FGC-Season,FGC-FGC_CU,FGC-FGC_CU_Zone,FGC-FGC_GSND,FGC-FGC_GSND_Zone,FGC-FGC_GSD,FGC-FGC_GSD_Zone,FGC-FGC_PU,FGC-FGC_PU_Zone,FGC-FGC_SRL,FGC-FGC_SRL_Zone,FGC-FGC_SRR,FGC-FGC_SRR_Zone,FGC-FGC_TL,FGC-FGC_TL_Zone,BIA-Season,BIA-BIA_Activity_Level_num,BIA-BIA_BMC,BIA-BIA_BMI,BIA-BIA_BMR,BIA-BIA_DEE,BIA-BIA_ECW,BIA-BIA_FFM,BIA-BIA_FFMI,BIA-BIA_FMI,BIA-BIA_Fat,BIA-BIA_Frame_num,BIA-BIA_ICW,BIA-BIA_LDM,BIA-BIA_LST,BIA-BIA_SMM,BIA-BIA_TBW,PAQ_A-Season,PAQ_A-PAQ_A_Total,PAQ_C-Season,PAQ_C-PAQ_C_Total,PCIAT-Season,PCIAT-PCIAT_01,PCIAT-PCIAT_02,PCIAT-PCIAT_03,PCIAT-PCIAT_04,PCIAT-PCIAT_05,PCIAT-PCIAT_06,PCIAT-PCIAT_07,PCIAT-PCIAT_08,PCIAT-PCIAT_09,PCIAT-PCIAT_10,PCIAT-PCIAT_11,PCIAT-PCIAT_12,PCIAT-PCIAT_13,PCIAT-PCIAT_14,PCIAT-PCIAT_15,PCIAT-PCIAT_16,PCIAT-PCIAT_17,PCIAT-PCIAT_18,PCIAT-PCIAT_19,PCIAT-PCIAT_20,PCIAT-PCIAT_Total,SDS-Season,SDS-SDS_Total_Raw,SDS-SDS_Total_T,PreInt_EduHx-Season,PreInt_EduHx-computerinternet_hoursday,sii,feature_0,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,feature_10,feature_11,feature_12,feature_13,feature_14,feature_15,feature_16,feature_17,feature_18,feature_19,feature_20,feature_21,feature_22,feature_23,feature_24,feature_25,feature_26,feature_27,feature_28,feature_29,feature_30,feature_31,feature_32,feature_33,feature_34,feature_35,feature_36,feature_37,feature_38,feature_39,feature_40,feature_41,feature_42,feature_43,feature_44,feature_45,feature_46,feature_47,feature_48,feature_49,feature_50,feature_51,feature_52,feature_53,feature_54,feature_55,feature_56,feature_57,feature_58,feature_59,feature_60,feature_61,feature_62,feature_63,feature_64,feature_65,feature_66,feature_67,feature_68,feature_69,feature_70,feature_71,feature_72,feature_73,feature_74,feature_75,feature_76,feature_77,feature_78,feature_79,feature_80,feature_81,feature_82,feature_83,feature_84,feature_85,feature_86,feature_87,feature_88,feature_89,feature_90,feature_91,feature_92,feature_93,feature_94,feature_95,BIA-BIA_BMC_boxcox,BIA-BIA_BMR_boxcox,BIA-BIA_DEE_boxcox,BIA-BIA_ECW_boxcox,BIA-BIA_Fat_boxcox,BIA-BIA_FFM_boxcox,BIA-BIA_FFMI_boxcox,BIA-BIA_FMI_boxcox,BIA-BIA_ICW_boxcox,BIA-BIA_LDM_boxcox,BIA-BIA_LST_boxcox,BIA-BIA_TBW_boxcox,CGAS-CGAS_Score_boxcox,feature_23_boxcox,feature_35_boxcox,feature_38_boxcox,feature_40_boxcox,feature_47_boxcox,feature_54_boxcox,feature_66_boxcox,feature_78_boxcox,feature_80_boxcox,feature_88_boxcox,feature_90_boxcox
0,Fall,5,0,Winter,51.0,Fall,16.877316,46.0,50.8,,,,,,,,,Fall,0.0,0.0,,,,,0.0,0.0,7.0,0.0,6.0,0.0,6.0,1.0,Fall,2.0,2.66855,16.8792,932.498,1492.0,8.25598,41.5862,13.8177,3.06143,9.21377,1.0,24.4349,8.89536,38.9177,19.5413,32.6909,,,,,Fall,5.0,4.0,4.0,0.0,4.0,0.0,0.0,4.0,0.0,0.0,4.0,0.0,4.0,4.0,4.0,4.0,4.0,4.0,2.0,4.0,55.0,,,,Fall,3.0,2.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.795346,0.494067,1.013221,1.877806,6.776256e+107,1.210694,0.378516,13083250000000.0,0.836481,1.443716,1.461227,1.296982,2.887303,,,,,,,,,,,
1,Summer,9,0,,,Fall,14.03559,48.0,46.0,22.0,75.0,70.0,122.0,,,,,Fall,3.0,0.0,,,,,5.0,0.0,11.0,1.0,11.0,1.0,3.0,0.0,Winter,2.0,2.57949,14.0371,936.656,1498.65,6.01993,42.0291,12.8254,1.21172,3.97085,1.0,21.0352,14.974,39.4497,15.4107,27.0552,,,Fall,2.34,Fall,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Fall,46.0,64.0,Summer,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.791257,0.494067,1.013225,1.624546,6.664675e+107,1.211268,0.378436,12361790000000.0,0.832549,1.635205,1.462669,1.279664,,,,,,,,,,,,
2,Summer,10,1,Fall,71.0,Fall,16.648696,56.5,75.6,,65.0,94.0,117.0,Fall,5.0,7.0,33.0,Fall,20.0,1.0,10.2,1.0,14.7,2.0,7.0,1.0,10.0,1.0,10.0,1.0,5.0,0.0,,,,,,,,,,,,,,,,,,,,Summer,2.17,Fall,5.0,2.0,2.0,1.0,2.0,1.0,1.0,2.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,2.0,2.0,1.0,1.0,28.0,Fall,38.0,54.0,Summer,2.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3.054867,,,,,,,,,,,
3,Winter,9,0,Fall,71.0,Summer,18.292347,56.0,81.6,,60.0,97.0,117.0,Summer,6.0,9.0,37.0,Summer,18.0,1.0,,,,,5.0,0.0,7.0,0.0,7.0,0.0,7.0,1.0,Summer,3.0,3.84191,18.2943,1131.43,1923.44,15.5925,62.7757,14.074,4.22033,18.8243,2.0,30.4041,16.779,58.9338,26.4798,45.9966,,,Winter,2.451,Summer,4.0,2.0,4.0,0.0,5.0,1.0,0.0,3.0,2.0,2.0,3.0,0.0,3.0,0.0,0.0,3.0,4.0,3.0,4.0,1.0,44.0,Summer,31.0,45.0,Winter,0.0,1.0,0.042645,0.042676,0.043242,0.045038,0.048476,0.045835,0.043521,0.041334,0.039622,0.044423,0.039155,0.042511,0.278738,0.456723,0.457681,0.26365,0.458967,0.000421,0.138588,0.721517,0.54193,0.618317,0.678421,0.208737,0.632449,0.622641,0.540742,0.252408,0.433229,0.001938,0.266193,0.420612,0.50821,0.614776,0.240254,0.211696,0.812929,0.654278,0.997339,3.386895e-07,0.02447,9.9e-05,0.000105,0.638357,0.00797,0.007559,0.480474,0.197097,0.183379,0.338868,0.199188,0.141434,0.261364,1.219247e-09,0.012135,0.65364,0.491204,0.363322,0.558674,0.193358,0.228993,0.514928,0.409082,0.324099,0.432641,1.338952e-08,0.073442,0.697641,0.632515,0.547483,0.720847,0.196587,0.31049,0.639856,0.63509,0.33663,0.608148,6.690705e-07,0.040369,0.811975,0.641022,0.745902,0.793446,0.203815,0.118262,0.092478,0.138501,0.23968,0.998275,0.000381,0.088301,0.080722,0.987448,0.998209,0.839868,0.221044,1.845723,0.494067,1.013388,2.360996,6.985483e+107,1.229847,0.378534,13552920000000.0,0.841114,1.671884,1.5003,1.322946,3.054867,-1.268304,-3.337948,-0.002456,-4.426193,-3.088532,-6.034088,-8.260575,-3.676821,-0.395425,-0.001557,-0.777888
4,Spring,18,1,Summer,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Summer,1.04,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
5,Spring,13,1,Winter,50.0,Summer,22.279952,59.5,112.2,,60.0,73.0,102.0,,,,,Summer,12.0,0.0,16.5,2.0,17.9,2.0,6.0,0.0,10.0,1.0,11.0,1.0,8.0,0.0,Summer,2.0,4.33036,30.1865,1330.97,1996.45,30.2124,84.0285,16.6877,13.4988,67.9715,2.0,32.9141,20.902,79.6982,35.3804,63.1265,,,Spring,4.11,Summer,3.0,3.0,3.0,0.0,2.0,1.0,0.0,2.0,2.0,1.0,0.0,1.0,3.0,3.0,2.0,1.0,3.0,1.0,2.0,1.0,34.0,Summer,40.0,56.0,Spring,0.0,1.0,0.498981,0.507874,0.506758,0.516595,0.526931,0.518792,0.512327,0.522206,0.500484,0.50464,0.513426,0.506268,0.524493,0.464179,0.183297,0.076892,0.201258,0.648223,0.055414,0.379109,0.280706,0.6212,0.70098,0.231246,0.393119,0.443718,0.624326,0.129813,0.559059,0.973229,0.129856,0.658982,0.736637,0.76792,0.168795,0.134178,0.789989,0.64851,0.994572,0.0004414713,0.033648,2.1e-05,5.4e-05,0.027227,2.2e-05,0.003486,0.533289,0.232812,0.642319,0.433224,0.013609,0.010706,0.016909,0.02486973,0.023024,0.343555,0.179798,0.221327,0.612637,0.226854,0.526252,0.476078,0.053047,0.045556,0.077106,0.9906693,0.048741,0.27965,0.422265,0.529694,0.709783,0.219557,0.369415,0.514185,0.289574,0.059921,0.321687,0.9999974,0.033877,0.431582,0.578981,0.856462,0.749528,0.219658,0.130645,0.158716,0.168184,0.249119,0.999158,1.0,0.090144,0.079093,0.999967,0.999794,0.839888,0.230974,1.864977,0.494067,1.013409,2.828091,8.156581e+107,1.240129,0.378661,17846420000000.0,0.842524,1.738032,1.523098,1.342026,2.876979,-1.201298,-5.561385,-0.004617,-3.983888,-2.581788,-4.919829,-11.762419,-3.907207,-0.473372,-0.000801,-0.776802
6,Fall,10,0,,,Fall,19.66076,55.0,84.6,,123.0,83.0,163.0,,,,,Fall,9.0,1.0,,,,,2.0,0.0,11.0,1.0,11.0,1.0,11.0,1.0,Fall,2.0,3.78271,19.6629,1135.86,1817.38,16.3275,63.247,14.7,4.96291,21.353,2.0,30.8936,16.0259,59.4643,26.1957,47.2211,,,Winter,3.67,Winter,1.0,4.0,1.0,0.0,2.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,1.0,4.0,1.0,0.0,20.0,Winter,27.0,40.0,Fall,3.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.843325,0.494067,1.013354,2.394666,7.041561e+107,1.230141,0.378572,13861210000000.0,0.841408,1.65729,1.501039,1.324696,,,,,,,,,,,,
7,Fall,10,1,,,Fall,16.861286,59.25,84.2,27.0,71.0,90.0,116.0,,,,,Fall,0.0,0.0,12.6,2.0,11.1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,Fall,3.0,4.05726,16.8631,1180.04,1888.06,21.94,67.9527,13.6092,3.25395,16.2474,2.0,28.5367,17.476,63.8954,28.768,50.4767,,,Fall,1.27,,,,,,,,,,,,,,,,,,,,,,,,,,Fall,2.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.854326,0.494067,1.013377,2.606525,6.92878e+107,1.232877,0.378501,13160320000000.0,0.83989,1.684591,1.506823,1.328996,,,,,,,,,,,,
8,Summer,15,0,,,Spring,,,,,,,,,,,,Spring,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Summer,2.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
9,Summer,19,1,Summer,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [9]:
# Feature engineering
def feature_engineering(df):     
    # Combine all grip strength
    df['FGC-FGC_GS'] = df['FGC-FGC_GSD_Zone'] + df['FGC-FGC_GSND_Zone']
    
    # Combine all sit and reach
    df['FGC-FGC_SR'] = df['FGC-FGC_SRL_Zone'] + df['FGC-FGC_SRR_Zone']
    
    # Create a fitness score by adding the zone fitness data
    df['fitness_score'] = df['FGC-FGC_GS'] + df['FGC-FGC_SR'] + df['FGC-FGC_CU_Zone'] + df['FGC-FGC_PU_Zone'] + df['FGC-FGC_TL_Zone']
    
    # Combine PAQ_A-PAQ_A_Total and PAQ_C-PAQ_C_Total into one column
    df['PAQ_Total'] = df['PAQ_A-PAQ_A_Total'].combine_first(df['PAQ_C-PAQ_C_Total'])
    
    # Combine up to stat 11 of actigraphy stats
    df['combined_actigraphy_stat'] = df['feature_0'] + df['feature_1'] + df['feature_2'] + df['feature_3'] + df['feature_4'] + df['feature_5'] + df['feature_6'] + df['feature_7'] + df['feature_8']+ df['feature_9'] + df['feature_10'] + df['feature_11']
    
    # Reworking of features from other notebook
    df['BMI_Age'] = df['Physical-BMI'] * df['Basic_Demos-Age']
    
    df['Internet_Hours_Age'] = df['PreInt_EduHx-computerinternet_hoursday'] * df['Basic_Demos-Age']
    df['Age_Internet_Hours'] = df['PreInt_EduHx-computerinternet_hoursday'] / df['Basic_Demos-Age']
    df['BMI_Internet_Hours'] = df['Physical-BMI'] * df['PreInt_EduHx-computerinternet_hoursday']
    df['BIA_BMI_Internet_Hours_Age'] = (df['BIA-BIA_BMI'] * df['PreInt_EduHx-computerinternet_hoursday']) / df['Basic_Demos-Age']
    
    df['BFP_BMI'] = df['BIA-BIA_Fat'] / df['BIA-BIA_BMI']
    df['BFP_BMI_Age'] = df['BIA-BIA_Fat'] / (df['BIA-BIA_BMI']*df['BMI_Age'])
    
    df['FFMI_BFP'] = df['BIA-BIA_FFMI_boxcox'] / df['BIA-BIA_Fat']
    df['FMI_BFP'] = df['BIA-BIA_FMI_boxcox'] / df['BIA-BIA_Fat']
    df['LST_TBW'] = df['BIA-BIA_LST_boxcox'] / df['BIA-BIA_TBW_boxcox']
    df['BFP_BMR'] = df['BIA-BIA_Fat'] * df['BIA-BIA_BMR_boxcox']
    df['BFP_DEE'] = df['BIA-BIA_Fat'] * df['BIA-BIA_DEE_boxcox']
   
    df['BMR_Weight'] = df['BIA-BIA_BMR_boxcox'] / df['Physical-Weight']
    df['DEE_Weight'] = df['BIA-BIA_DEE_boxcox'] / df['Physical-Weight']
    df['SMM_Height'] = df['BIA-BIA_SMM'] / df['Physical-Height']
    df['BMR_BMI'] = df['BIA-BIA_BMR_boxcox'] / df['Physical-BMI']
    df['DEE_BMI'] = df['BIA-BIA_DEE_boxcox'] / df['Physical-BMI']
    df['SMM_BMI'] = df['BIA-BIA_SMM'] / df['Physical-BMI']
    
    df['Muscle_to_Fat'] = df['BIA-BIA_SMM'] / df['BIA-BIA_FMI_boxcox']
    df['Hydration_Status'] = df['BIA-BIA_TBW_boxcox'] / df['Physical-Weight']
    df['ICW_TBW'] = df['BIA-BIA_ICW_boxcox'] / df['BIA-BIA_TBW_boxcox']
    
    df['Age_Weight'] = df['Basic_Demos-Age'] * df['Physical-Weight']
    df['Age_Weight_BMI'] = (df['Basic_Demos-Age'] * df['Physical-Weight']) / df['Physical-BMI']
    df['Sex_BMI'] = df['Basic_Demos-Sex'] * df['Physical-BMI']
    df['Sex_HeartRate'] = df['Basic_Demos-Sex'] * df['Physical-HeartRate']
    
    df['Age_WaistCirc'] = df['Basic_Demos-Age'] * df['Physical-Waist_Circumference']
    df['Age_WaistCirc_BMI'] = (df['Basic_Demos-Age'] * df['Physical-Waist_Circumference']) / df['Physical-BMI']
    
    df['BMI_FitnessMaxStage'] = df['Physical-BMI'] * df['Fitness_Endurance-Max_Stage']
    df['Weight_GripStrengthDominant'] = df['Physical-Weight'] * df['FGC-FGC_GSD']
    df['Weight_GripStrengthNonDominant'] = df['Physical-Weight'] * df['FGC-FGC_GSND']
    df['HeartRate_FitnessTime'] = df['Physical-HeartRate'] * (df['Fitness_Endurance-Time_Mins'] + df['Fitness_Endurance-Time_Sec'])
    df['Age_PushUp'] = df['Basic_Demos-Age'] * df['FGC-FGC_PU']
    df['FFMI_Age'] = df['BIA-BIA_FFMI_boxcox'] * df['Basic_Demos-Age']
    df['InternetUse_SleepDisturbance'] = df['PreInt_EduHx-computerinternet_hoursday'] * df['SDS-SDS_Total_Raw']
    df['InternetUse_SleepDisturbance_BMI'] = (df['PreInt_EduHx-computerinternet_hoursday'] * df['SDS-SDS_Total_Raw']) / df['Physical-BMI']
    df['CGAS_BMI'] = df['CGAS-CGAS_Score'] * df['Physical-BMI']
    df['CGAS_FitnessMaxStage'] = df['CGAS-CGAS_Score'] * df['Fitness_Endurance-Max_Stage']
    
    return df

In [11]:
# Enumerate category columns
cat_c = ['Basic_Demos-Enroll_Season', 'CGAS-Season', 'Physical-Season', 
          'Fitness_Endurance-Season', 'FGC-Season', 'BIA-Season', 
          'PAQ_A-Season', 'PAQ_C-Season', 'SDS-Season', 'PreInt_EduHx-Season']

def update(df):
    global cat_c
    for c in cat_c: 
        df[c] = df[c].fillna('Missing')
        df[c] = df[c].astype('category')
    return df
        
train = update(train)
test = update(test)

def create_mapping(column, dataset):
    unique_values = dataset[column].unique()
    return {value: idx for idx, value in enumerate(unique_values)}

for col in cat_c:
    mapping = create_mapping(col, train)
    mappingTe = create_mapping(col, test)
    
    train[col] = train[col].replace(mapping).astype(int)
    test[col] = test[col].replace(mappingTe).astype(int)

In [12]:
imputer = KNNImputer(n_neighbors=5)
numeric_cols = train.select_dtypes(include=['float64', 'int64']).columns
imputed_data = imputer.fit_transform(train[numeric_cols])
train_imputed = pd.DataFrame(imputed_data, columns=numeric_cols)
train_imputed['sii'] = train_imputed['sii'].round().astype(int)
for col in train.columns:
    if col not in numeric_cols:
        train_imputed[col] = train[col]
        
train = train_imputed

train = feature_engineering(train)
train = train.dropna(thresh=10, axis=0)
test = feature_engineering(test)

In [13]:
train.head(10)

Unnamed: 0,Basic_Demos-Age,Basic_Demos-Sex,CGAS-CGAS_Score,Physical-BMI,Physical-Height,Physical-Weight,Physical-Waist_Circumference,Physical-Diastolic_BP,Physical-HeartRate,Physical-Systolic_BP,Fitness_Endurance-Max_Stage,Fitness_Endurance-Time_Mins,Fitness_Endurance-Time_Sec,FGC-FGC_CU,FGC-FGC_CU_Zone,FGC-FGC_GSND,FGC-FGC_GSND_Zone,FGC-FGC_GSD,FGC-FGC_GSD_Zone,FGC-FGC_PU,FGC-FGC_PU_Zone,FGC-FGC_SRL,FGC-FGC_SRL_Zone,FGC-FGC_SRR,FGC-FGC_SRR_Zone,FGC-FGC_TL,FGC-FGC_TL_Zone,BIA-BIA_Activity_Level_num,BIA-BIA_BMC,BIA-BIA_BMI,BIA-BIA_BMR,BIA-BIA_DEE,BIA-BIA_ECW,BIA-BIA_FFM,BIA-BIA_FFMI,BIA-BIA_FMI,BIA-BIA_Fat,BIA-BIA_Frame_num,BIA-BIA_ICW,BIA-BIA_LDM,BIA-BIA_LST,BIA-BIA_SMM,BIA-BIA_TBW,PAQ_A-PAQ_A_Total,PAQ_C-PAQ_C_Total,PCIAT-PCIAT_01,PCIAT-PCIAT_02,PCIAT-PCIAT_03,PCIAT-PCIAT_04,PCIAT-PCIAT_05,PCIAT-PCIAT_06,PCIAT-PCIAT_07,PCIAT-PCIAT_08,PCIAT-PCIAT_09,PCIAT-PCIAT_10,PCIAT-PCIAT_11,PCIAT-PCIAT_12,PCIAT-PCIAT_13,PCIAT-PCIAT_14,PCIAT-PCIAT_15,PCIAT-PCIAT_16,PCIAT-PCIAT_17,PCIAT-PCIAT_18,PCIAT-PCIAT_19,PCIAT-PCIAT_20,PCIAT-PCIAT_Total,SDS-SDS_Total_Raw,SDS-SDS_Total_T,PreInt_EduHx-computerinternet_hoursday,sii,BIA-BIA_BMC_boxcox,BIA-BIA_BMR_boxcox,BIA-BIA_DEE_boxcox,BIA-BIA_ECW_boxcox,BIA-BIA_Fat_boxcox,BIA-BIA_FFM_boxcox,BIA-BIA_FFMI_boxcox,BIA-BIA_FMI_boxcox,BIA-BIA_ICW_boxcox,BIA-BIA_LDM_boxcox,BIA-BIA_LST_boxcox,BIA-BIA_TBW_boxcox,CGAS-CGAS_Score_boxcox,Basic_Demos-Enroll_Season,CGAS-Season,Physical-Season,Fitness_Endurance-Season,FGC-Season,BIA-Season,PAQ_A-Season,PAQ_C-Season,PCIAT-Season,SDS-Season,PreInt_EduHx-Season,feature_0,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,feature_10,feature_11,feature_12,feature_13,feature_14,feature_15,feature_16,feature_17,feature_18,feature_19,feature_20,feature_21,feature_22,feature_23,feature_24,feature_25,feature_26,feature_27,feature_28,feature_29,feature_30,feature_31,feature_32,feature_33,feature_34,feature_35,feature_36,feature_37,feature_38,feature_39,feature_40,feature_41,feature_42,feature_43,feature_44,feature_45,feature_46,feature_47,feature_48,feature_49,feature_50,feature_51,feature_52,feature_53,feature_54,feature_55,feature_56,feature_57,feature_58,feature_59,feature_60,feature_61,feature_62,feature_63,feature_64,feature_65,feature_66,feature_67,feature_68,feature_69,feature_70,feature_71,feature_72,feature_73,feature_74,feature_75,feature_76,feature_77,feature_78,feature_79,feature_80,feature_81,feature_82,feature_83,feature_84,feature_85,feature_86,feature_87,feature_88,feature_89,feature_90,feature_91,feature_92,feature_93,feature_94,feature_95,feature_23_boxcox,feature_35_boxcox,feature_38_boxcox,feature_40_boxcox,feature_47_boxcox,feature_54_boxcox,feature_66_boxcox,feature_78_boxcox,feature_80_boxcox,feature_88_boxcox,feature_90_boxcox,FGC-FGC_GS,FGC-FGC_SR,fitness_score,PAQ_Total,combined_actigraphy_stat,BMI_Age,Internet_Hours_Age,Age_Internet_Hours,BMI_Internet_Hours,BIA_BMI_Internet_Hours_Age,BFP_BMI,BFP_BMI_Age,FFMI_BFP,FMI_BFP,LST_TBW,BFP_BMR,BFP_DEE,BMR_Weight,DEE_Weight,SMM_Height,BMR_BMI,DEE_BMI,SMM_BMI,Muscle_to_Fat,Hydration_Status,ICW_TBW,Age_Weight,Age_Weight_BMI,Sex_BMI,Sex_HeartRate,Age_WaistCirc,Age_WaistCirc_BMI,BMI_FitnessMaxStage,Weight_GripStrengthDominant,Weight_GripStrengthNonDominant,HeartRate_FitnessTime,Age_PushUp,FFMI_Age,InternetUse_SleepDisturbance,InternetUse_SleepDisturbance_BMI,CGAS_BMI,CGAS_FitnessMaxStage
0,5.0,0.0,51.0,16.877316,46.0,50.8,28.0,68.6,83.8,111.6,4.2,4.8,39.2,0.0,0.0,18.58,2.6,29.06,1.6,0.0,0.0,7.0,0.0,6.0,0.0,6.0,1.0,2.0,2.66855,16.8792,932.498,1492.0,8.25598,41.5862,13.8177,3.06143,9.21377,1.0,24.4349,8.89536,38.9177,19.5413,32.6909,3.1638,2.5882,5.0,4.0,4.0,0.0,4.0,0.0,0.0,4.0,0.0,0.0,4.0,0.0,4.0,4.0,4.0,4.0,4.0,4.0,2.0,4.0,55.0,42.0,56.6,3.0,2,1.795346,0.494067,1.013221,1.877806,6.776256e+107,1.210694,0.378516,13083250000000.0,0.836481,1.443716,1.461227,1.296982,2.887303,0,0,0,0,0,0,0,0,Fall,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4.2,0.0,5.2,3.1638,,84.386578,15.0,0.6,50.631947,10.12752,0.545865,0.006469,0.041082,1419967000000.0,1.126637,4.552216,9.335588,0.009726,0.019945,0.424811,0.029274,0.060035,1.157844,1.493612e-12,0.025531,0.644944,254.0,15.049787,0.0,0.0,140.0,8.295158,70.884726,1476.248,943.864,3687.2,0.0,1.892581,126.0,7.465642,860.7431,214.2
1,9.0,0.0,67.2,14.03559,48.0,46.0,22.0,75.0,70.0,122.0,4.2,4.8,39.2,3.0,0.0,22.98,1.4,21.46,1.8,5.0,0.0,11.0,1.0,11.0,1.0,3.0,0.0,2.0,2.57949,14.0371,936.656,1498.65,6.01993,42.0291,12.8254,1.21172,3.97085,1.0,21.0352,14.974,39.4497,15.4107,27.0552,2.6258,2.34,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,46.0,64.0,0.0,0,1.791257,0.494067,1.013225,1.624546,6.664675e+107,1.211268,0.378436,12361790000000.0,0.832549,1.635205,1.462669,1.279664,3.02337,1,1,0,0,0,1,0,1,Fall,1,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3.2,2.0,5.2,2.6258,,126.320313,0.0,0.0,0.0,0.0,0.282883,0.002239,0.095303,3113133000000.0,1.14301,1.961865,4.023363,0.010741,0.022027,0.321056,0.035201,0.07219,1.097973,1.24664e-12,0.027819,0.6506,414.0,29.496444,0.0,0.0,198.0,14.106995,58.949479,987.16,1057.08,3080.0,45.0,3.405921,0.0,0.0,943.191667,282.24
2,10.0,1.0,71.0,16.648696,56.5,75.6,28.0,65.0,94.0,117.0,5.0,7.0,33.0,20.0,1.0,10.2,1.0,14.7,2.0,7.0,1.0,10.0,1.0,10.0,1.0,5.0,0.0,3.2,3.66177,17.33532,1195.051,2208.25,17.386128,69.5518,13.87402,3.461298,17.488204,1.2,33.03372,19.13194,65.89,32.59526,50.41984,2.028,2.17,5.0,2.0,2.0,1.0,2.0,1.0,1.0,2.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,2.0,2.0,1.0,1.0,28.0,38.0,54.0,2.0,0,1.835777,0.494067,1.013444,2.394988,6.956961e+107,1.232235,0.378512,13246280000000.0,0.842177,1.698815,1.506082,1.326553,3.054867,1,2,0,1,0,2,0,2,Fall,1,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3.0,2.0,7.0,2.028,,166.486961,20.0,0.2,33.297392,3.467064,1.008819,0.006059,0.021644,757441000000.0,1.135335,8.640341,17.723324,0.006535,0.013405,0.576907,0.029676,0.060872,1.957827,2.46071e-12,0.017547,0.634861,756.0,45.408962,16.648696,94.0,280.0,16.818134,83.24348,1111.32,771.12,3760.0,70.0,3.78512,76.0,4.564922,1182.05742,355.0
3,9.0,0.0,71.0,18.292347,56.0,81.6,26.8,60.0,97.0,117.0,6.0,9.0,37.0,18.0,1.0,18.94,1.8,28.88,2.2,5.0,0.0,7.0,0.0,7.0,0.0,7.0,1.0,3.0,3.84191,18.2943,1131.43,1923.44,15.5925,62.7757,14.074,4.22033,18.8243,2.0,30.4041,16.779,58.9338,26.4798,45.9966,2.3802,2.451,4.0,2.0,4.0,0.0,5.0,1.0,0.0,3.0,2.0,2.0,3.0,0.0,3.0,0.0,0.0,3.0,4.0,3.0,4.0,1.0,44.0,31.0,45.0,0.0,1,1.845723,0.494067,1.013388,2.360996,6.985483e+107,1.229847,0.378534,13552920000000.0,0.841114,1.671884,1.5003,1.322946,3.054867,2,2,1,2,1,3,0,3,Summer,2,2,0.042645,0.042676,0.043242,0.045038,0.048476,0.045835,0.043521,0.041334,0.039622,0.044423,0.039155,0.042511,0.278738,0.456723,0.457681,0.26365,0.458967,0.000421,0.138588,0.721517,0.54193,0.618317,0.678421,0.208737,0.632449,0.622641,0.540742,0.252408,0.433229,0.001938,0.266193,0.420612,0.50821,0.614776,0.240254,0.211696,0.812929,0.654278,0.997339,3.386895e-07,0.02447,9.9e-05,0.000105,0.638357,0.00797,0.007559,0.480474,0.197097,0.183379,0.338868,0.199188,0.141434,0.261364,1.219247e-09,0.012135,0.65364,0.491204,0.363322,0.558674,0.193358,0.228993,0.514928,0.409082,0.324099,0.432641,1.338952e-08,0.073442,0.697641,0.632515,0.547483,0.720847,0.196587,0.31049,0.639856,0.63509,0.33663,0.608148,6.690705e-07,0.040369,0.811975,0.641022,0.745902,0.793446,0.203815,0.118262,0.092478,0.138501,0.23968,0.998275,0.000381,0.088301,0.080722,0.987448,0.998209,0.839868,0.221044,-1.268304,-3.337948,-0.002456,-4.426193,-3.088532,-6.034088,-8.260575,-3.676821,-0.395425,-0.001557,-0.777888,4.0,0.0,6.0,2.3802,0.518477,164.631122,0.0,0.0,0.0,0.0,1.028971,0.00625,0.020109,719969500000.0,1.13406,9.300462,19.076317,0.006055,0.012419,0.472854,0.027009,0.0554,1.447589,1.953807e-12,0.016213,0.635789,734.4,40.147937,0.0,0.0,241.2,13.185842,109.754082,2356.608,1545.504,4462.0,45.0,3.406803,0.0,0.0,1298.756633,426.0
4,18.0,1.0,70.6,16.570716,53.05,68.36,28.0,67.6,85.2,110.6,4.8,9.0,43.8,12.0,0.4,20.74,2.0,21.08,1.8,16.0,0.4,8.6,0.6,7.7,0.4,9.1,0.6,3.2,3.66177,17.33532,1195.051,2208.25,17.386128,69.5518,13.87402,3.461298,17.488204,1.2,33.03372,19.13194,65.89,32.59526,50.41984,1.04,2.3418,1.0,1.0,1.2,0.4,0.8,0.6,0.0,0.0,0.4,0.2,0.6,0.0,0.4,0.2,1.0,0.6,0.4,0.4,0.4,0.2,13.6,33.8,48.4,1.8,0,1.835777,0.494067,1.013444,2.394988,6.956961e+107,1.232235,0.378512,13246280000000.0,0.842177,1.698815,1.506082,1.326553,3.0465,3,3,2,0,2,2,1,0,,0,3,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3.8,1.0,6.2,1.04,,298.272883,32.4,0.1,29.827288,1.733532,1.008819,0.003382,0.021644,757441000000.0,1.135335,8.640341,17.723324,0.007227,0.014825,0.614425,0.029816,0.061159,1.96704,2.46071e-12,0.019405,0.634861,1230.48,74.256298,16.570716,85.2,504.0,30.415101,79.539435,1441.0288,1417.7864,4498.56,288.0,6.813215,60.84,3.671537,1169.89253,338.88
5,13.0,1.0,50.0,22.279952,59.5,112.2,25.0,60.0,73.0,102.0,4.2,4.8,39.2,12.0,0.0,16.5,2.0,17.9,2.0,6.0,0.0,10.0,1.0,11.0,1.0,8.0,0.0,2.0,4.33036,30.1865,1330.97,1996.45,30.2124,84.0285,16.6877,13.4988,67.9715,2.0,32.9141,20.902,79.6982,35.3804,63.1265,2.414,4.11,3.0,3.0,3.0,0.0,2.0,1.0,0.0,2.0,2.0,1.0,0.0,1.0,3.0,3.0,2.0,1.0,3.0,1.0,2.0,1.0,34.0,40.0,56.0,0.0,1,1.864977,0.494067,1.013409,2.828091,8.156581e+107,1.240129,0.378661,17846420000000.0,0.842524,1.738032,1.523098,1.342026,2.876979,3,0,1,0,1,3,0,4,Summer,2,4,0.498981,0.507874,0.506758,0.516595,0.526931,0.518792,0.512327,0.522206,0.500484,0.50464,0.513426,0.506268,0.524493,0.464179,0.183297,0.076892,0.201258,0.648223,0.055414,0.379109,0.280706,0.6212,0.70098,0.231246,0.393119,0.443718,0.624326,0.129813,0.559059,0.973229,0.129856,0.658982,0.736637,0.76792,0.168795,0.134178,0.789989,0.64851,0.994572,0.0004414713,0.033648,2.1e-05,5.4e-05,0.027227,2.2e-05,0.003486,0.533289,0.232812,0.642319,0.433224,0.013609,0.010706,0.016909,0.02486973,0.023024,0.343555,0.179798,0.221327,0.612637,0.226854,0.526252,0.476078,0.053047,0.045556,0.077106,0.9906693,0.048741,0.27965,0.422265,0.529694,0.709783,0.219557,0.369415,0.514185,0.289574,0.059921,0.321687,0.9999974,0.033877,0.431582,0.578981,0.856462,0.749528,0.219658,0.130645,0.158716,0.168184,0.249119,0.999158,1.0,0.090144,0.079093,0.999967,0.999794,0.839888,0.230974,-1.201298,-5.561385,-0.004617,-3.983888,-2.581788,-4.919829,-11.762419,-3.907207,-0.473372,-0.000801,-0.776802,4.0,2.0,6.0,2.414,6.135283,289.639376,0.0,0.0,0.0,0.0,2.251718,0.007774,0.005571,262557500000.0,1.134924,33.582468,68.882927,0.004403,0.009032,0.594629,0.022175,0.045485,1.587993,1.982492e-12,0.011961,0.6278,1458.6,65.466927,22.279952,73.0,325.0,14.587105,93.575798,2008.38,1851.3,3212.0,78.0,4.922599,0.0,0.0,1113.997599,210.0
6,10.0,0.0,59.2,19.66076,55.0,84.6,26.2,123.0,83.0,163.0,4.2,4.8,39.2,9.0,1.0,22.178,2.2,23.952,2.4,2.0,0.0,11.0,1.0,11.0,1.0,11.0,1.0,2.0,3.78271,19.6629,1135.86,1817.38,16.3275,63.247,14.7,4.96291,21.353,2.0,30.8936,16.0259,59.4643,26.1957,47.2211,2.3802,3.67,1.0,4.0,1.0,0.0,2.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,1.0,4.0,1.0,0.0,20.0,27.0,40.0,3.0,0,1.843325,0.494067,1.013354,2.394666,7.041561e+107,1.230141,0.378572,13861210000000.0,0.841408,1.65729,1.501039,1.324696,2.940062,0,1,0,0,0,0,0,3,Winter,3,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4.6,2.0,8.6,2.3802,,196.607603,30.0,0.3,58.982281,5.89887,1.085954,0.005523,0.017729,649145600000.0,1.133119,10.549809,21.638152,0.00584,0.011978,0.476285,0.02513,0.051542,1.332385,1.889857e-12,0.015658,0.635171,846.0,43.029872,0.0,0.0,262.0,13.326036,82.575193,2026.3392,1876.2588,3652.0,20.0,3.78572,81.0,4.119881,1163.917012,248.64
7,10.0,1.0,60.8,16.861286,59.25,84.2,27.0,71.0,90.0,116.0,4.2,4.8,39.2,0.0,0.0,12.6,2.0,11.1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,3.0,4.05726,16.8631,1180.04,1888.06,21.94,67.9527,13.6092,3.25395,16.2474,2.0,28.5367,17.476,63.8954,28.768,50.4767,1.936,1.27,3.0,2.6,3.0,1.0,3.8,0.8,1.4,1.2,1.6,1.4,2.2,0.2,2.0,1.8,1.4,2.2,3.4,1.4,2.4,1.0,36.4,42.4,66.8,2.0,1,1.854326,0.494067,1.013377,2.606525,6.92878e+107,1.232877,0.378501,13160320000000.0,0.83989,1.684591,1.506823,1.328996,2.966215,0,1,0,0,0,0,0,1,,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3.0,0.0,3.0,1.936,,168.612865,20.0,0.2,33.722573,3.37262,0.963488,0.005714,0.023296,809995300000.0,1.133806,8.027301,16.464742,0.005868,0.012035,0.485536,0.029302,0.060101,1.706157,2.185966e-12,0.015784,0.631973,842.0,49.936878,16.861286,90.0,270.0,16.013013,70.817403,934.62,1060.92,3960.0,0.0,3.78501,84.8,5.029272,1025.166217,255.36
8,15.0,0.0,65.0,16.570716,49.4,114.8,28.0,69.8,82.4,108.2,5.6,6.6,42.4,8.8,0.2,19.62,2.4,21.2,1.8,12.2,0.8,10.9,0.8,10.4,0.8,8.9,0.8,3.2,3.66177,17.33532,1195.051,2208.25,17.386128,69.5518,13.87402,3.461298,17.488204,1.2,33.03372,19.13194,65.89,32.59526,50.41984,2.436,2.3418,0.6,1.6,0.8,0.0,1.0,0.4,0.0,0.0,0.4,0.0,0.8,0.0,0.4,0.0,0.6,0.4,0.0,0.4,0.0,0.2,9.8,34.2,48.8,2.0,0,1.835777,0.494067,1.013444,2.394988,6.956961e+107,1.232235,0.378512,13246280000000.0,0.842177,1.698815,1.506082,1.326553,2.99102,1,1,3,0,3,2,0,0,,0,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4.2,1.6,7.6,2.436,,248.560736,30.0,0.133333,33.141431,2.311376,1.008819,0.004059,0.021644,757441000000.0,1.135335,8.640341,17.723324,0.004304,0.008828,0.659823,0.029816,0.061159,1.96704,2.46071e-12,0.011555,0.634861,1722.0,103.918263,0.0,0.0,420.0,25.345918,92.796008,2433.76,2252.376,4037.6,183.0,5.677679,68.4,4.127764,1077.096522,364.0
9,19.0,1.0,56.4,20.889514,53.4,118.88,28.0,71.4,85.2,109.0,4.8,9.0,43.8,12.0,0.4,20.74,2.0,21.08,1.8,16.0,0.4,8.6,0.6,7.7,0.4,9.1,0.6,3.2,3.66177,17.33532,1195.051,2208.25,17.386128,69.5518,13.87402,3.461298,17.488204,1.2,33.03372,19.13194,65.89,32.59526,50.41984,2.028,2.3418,1.0,1.0,0.6,0.4,0.8,0.6,0.0,0.0,0.4,0.2,0.4,0.0,0.8,0.2,0.6,0.6,0.4,0.4,0.4,0.6,13.6,38.2,54.0,1.8,0,1.835777,0.494067,1.013444,2.394988,6.956961e+107,1.232235,0.378512,13246280000000.0,0.842177,1.698815,1.506082,1.326553,2.931482,1,3,2,0,2,2,0,0,,0,3,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3.8,1.0,6.2,2.028,,396.900772,34.2,0.094737,37.601126,1.642293,1.008819,0.002542,0.021644,757441000000.0,1.135335,8.640341,17.723324,0.004156,0.008525,0.610398,0.023651,0.048515,1.560365,2.46071e-12,0.011159,0.634861,2258.72,108.126975,20.889514,85.2,532.0,25.467323,100.269669,2505.9904,2465.5712,4498.56,304.0,7.191727,68.76,3.291604,1178.168608,270.72


In [14]:
# Export train_data to CSV
train_output_path = os.path.join(output_folder, 'train_data_imputed.csv')
train.to_csv(train_output_path, index=False)
print(f"Imputed train data exported to: {train_output_path}")

# Export test_data to CSV
test_output_path = os.path.join(output_folder, 'test_data_imputed.csv')
test.to_csv(test_output_path, index=False)
print(f"Imputed test data exported to: {test_output_path}")

# Make copies of data for other submissions
train2 = train.copy()
test2 = test.copy()

train3 = train.copy()
test3 = test.copy()

print("Data export completed.")

Imputed train data exported to: output\train_data_imputed.csv
Imputed test data exported to: output\test_data_imputed.csv
Data export completed.


In [15]:
# Submission 1
# Feature selection
# Removed stats: 'feature_92','combined_actigraphy_stat', 
time_series_cols = [
    'feature_0', 'feature_1', 'feature_2', 'feature_3', 'feature_4','feature_5', 'feature_6', 
    'feature_7', 'feature_8', 'feature_9','feature_10', 'feature_11',
    'feature_12', 'feature_13', 'feature_14',
    'feature_15', 'feature_16', 'feature_17', 'feature_18', 'feature_19',
    'feature_20', 'feature_21', 'feature_22', 'feature_24',
    'feature_25', 'feature_26', 'feature_27', 'feature_28', 'feature_29',
    'feature_30', 'feature_31', 'feature_32', 'feature_33', 'feature_34',
    'feature_36', 'feature_37', 'feature_41', 'feature_42', 'feature_39',
    'feature_43', 'feature_44',
    'feature_45', 'feature_46', 'feature_48', 'feature_49',
    'feature_50', 'feature_51', 'feature_52', 'feature_53', 
    'feature_55', 'feature_56', 'feature_57', 'feature_58', 'feature_59',
    'feature_60', 'feature_61', 'feature_62', 'feature_63', 'feature_64',
    'feature_65', 'feature_67', 'feature_68', 'feature_69',
    'feature_70', 'feature_71', 'feature_72', 'feature_73', 'feature_74',
    'feature_75', 'feature_76', 'feature_77', 'feature_79',
    'feature_81', 'feature_82', 'feature_83', 'feature_84',
    'feature_85', 'feature_86', 'feature_87', 'feature_89',
    'feature_91', 'feature_93', 'feature_94',
    'feature_95', 'feature_23_boxcox', 'feature_35_boxcox', 'feature_38_boxcox',
    'feature_40_boxcox', 'feature_47_boxcox', 'feature_54_boxcox',
    'feature_66_boxcox', 'feature_78_boxcox', 'feature_80_boxcox',
    'feature_88_boxcox', 'feature_90_boxcox', 'combined_actigraphy_stat']

# Dropped columns , 'Fitness_Endurance-Time_Sec',
featuresCols = ['Basic_Demos-Age', 'Basic_Demos-Sex',
                'CGAS-CGAS_Score', 'Physical-BMI',
                'Physical-Height', 'Physical-Weight', 'Physical-Waist_Circumference',
                'Physical-Diastolic_BP', 'Physical-HeartRate', 'Physical-Systolic_BP',
                'Fitness_Endurance-Max_Stage',
                'Fitness_Endurance-Time_Mins', 'Fitness_Endurance-Time_Sec',
                'FGC-FGC_CU', 'FGC-FGC_CU_Zone', 'FGC-FGC_GSND',
                'FGC-FGC_GSND_Zone', 'FGC-FGC_GSD', 'FGC-FGC_GSD_Zone', 'FGC-FGC_PU',
                'FGC-FGC_PU_Zone', 'FGC-FGC_SRL', 'FGC-FGC_SRL_Zone', 'FGC-FGC_SRR',
                'FGC-FGC_SRR_Zone', 'FGC-FGC_TL', 'FGC-FGC_TL_Zone',
                'BIA-BIA_Activity_Level_num', 'BIA-BIA_BMC', 'BIA-BIA_BMI',
                'BIA-BIA_BMR', 'BIA-BIA_DEE', 'BIA-BIA_ECW', 'BIA-BIA_FFM',
                'BIA-BIA_FFMI', 'BIA-BIA_FMI', 'BIA-BIA_Fat', 'BIA-BIA_Frame_num',
                'BIA-BIA_ICW', 'BIA-BIA_LDM', 'BIA-BIA_LST', 'BIA-BIA_SMM',
                'BIA-BIA_TBW', 'PAQ_A-PAQ_A_Total',
                'PAQ_C-PAQ_C_Total', 'SDS-SDS_Total_Raw',
                'SDS-SDS_Total_T',
                'PreInt_EduHx-computerinternet_hoursday',  'BMI_Age','Internet_Hours_Age','BMI_Internet_Hours',
                'BFP_BMI', 'FFMI_BFP', 'FMI_BFP', 'LST_TBW', 'BFP_BMR', 'BFP_DEE', 'BMR_Weight', 'DEE_Weight',
                'SMM_Height', 'Muscle_to_Fat', 'Hydration_Status', 'ICW_TBW',
                'sii']

featuresCols += time_series_cols

train = train[featuresCols]
train = train.dropna(subset='sii')

# Dropped columns 
featuresCols = ['Basic_Demos-Age', 'Basic_Demos-Sex',
                'CGAS-CGAS_Score', 'Physical-BMI',
                'Physical-Height', 'Physical-Weight', 'Physical-Waist_Circumference',
                'Physical-Diastolic_BP', 'Physical-HeartRate', 'Physical-Systolic_BP',
                'Fitness_Endurance-Max_Stage',
                'Fitness_Endurance-Time_Mins', 'Fitness_Endurance-Time_Sec',
                'FGC-FGC_CU', 'FGC-FGC_CU_Zone', 'FGC-FGC_GSND',
                'FGC-FGC_GSND_Zone', 'FGC-FGC_GSD', 'FGC-FGC_GSD_Zone', 'FGC-FGC_PU',
                'FGC-FGC_PU_Zone', 'FGC-FGC_SRL', 'FGC-FGC_SRL_Zone', 'FGC-FGC_SRR',
                'FGC-FGC_SRR_Zone', 'FGC-FGC_TL', 'FGC-FGC_TL_Zone',
                'BIA-BIA_Activity_Level_num', 'BIA-BIA_BMC', 'BIA-BIA_BMI',
                'BIA-BIA_BMR', 'BIA-BIA_DEE', 'BIA-BIA_ECW', 'BIA-BIA_FFM',
                'BIA-BIA_FFMI', 'BIA-BIA_FMI', 'BIA-BIA_Fat', 'BIA-BIA_Frame_num',
                'BIA-BIA_ICW', 'BIA-BIA_LDM', 'BIA-BIA_LST', 'BIA-BIA_SMM',
                'BIA-BIA_TBW', 'PAQ_A-PAQ_A_Total',
                'PAQ_C-PAQ_C_Total', 'SDS-SDS_Total_Raw',
                'SDS-SDS_Total_T',
                'PreInt_EduHx-computerinternet_hoursday',  'BMI_Age','Internet_Hours_Age','BMI_Internet_Hours',
                'BFP_BMI', 'FFMI_BFP', 'FMI_BFP', 'LST_TBW', 'BFP_BMR', 'BFP_DEE', 'BMR_Weight', 'DEE_Weight',
                'SMM_Height', 'Muscle_to_Fat', 'Hydration_Status', 'ICW_TBW']

featuresCols += time_series_cols
test = test[featuresCols]

In [16]:
train.head(10)

Unnamed: 0,Basic_Demos-Age,Basic_Demos-Sex,CGAS-CGAS_Score,Physical-BMI,Physical-Height,Physical-Weight,Physical-Waist_Circumference,Physical-Diastolic_BP,Physical-HeartRate,Physical-Systolic_BP,Fitness_Endurance-Max_Stage,Fitness_Endurance-Time_Mins,Fitness_Endurance-Time_Sec,FGC-FGC_CU,FGC-FGC_CU_Zone,FGC-FGC_GSND,FGC-FGC_GSND_Zone,FGC-FGC_GSD,FGC-FGC_GSD_Zone,FGC-FGC_PU,FGC-FGC_PU_Zone,FGC-FGC_SRL,FGC-FGC_SRL_Zone,FGC-FGC_SRR,FGC-FGC_SRR_Zone,FGC-FGC_TL,FGC-FGC_TL_Zone,BIA-BIA_Activity_Level_num,BIA-BIA_BMC,BIA-BIA_BMI,BIA-BIA_BMR,BIA-BIA_DEE,BIA-BIA_ECW,BIA-BIA_FFM,BIA-BIA_FFMI,BIA-BIA_FMI,BIA-BIA_Fat,BIA-BIA_Frame_num,BIA-BIA_ICW,BIA-BIA_LDM,BIA-BIA_LST,BIA-BIA_SMM,BIA-BIA_TBW,PAQ_A-PAQ_A_Total,PAQ_C-PAQ_C_Total,SDS-SDS_Total_Raw,SDS-SDS_Total_T,PreInt_EduHx-computerinternet_hoursday,BMI_Age,Internet_Hours_Age,BMI_Internet_Hours,BFP_BMI,FFMI_BFP,FMI_BFP,LST_TBW,BFP_BMR,BFP_DEE,BMR_Weight,DEE_Weight,SMM_Height,Muscle_to_Fat,Hydration_Status,ICW_TBW,sii,feature_0,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,feature_10,feature_11,feature_12,feature_13,feature_14,feature_15,feature_16,feature_17,feature_18,feature_19,feature_20,feature_21,feature_22,feature_24,feature_25,feature_26,feature_27,feature_28,feature_29,feature_30,feature_31,feature_32,feature_33,feature_34,feature_36,feature_37,feature_41,feature_42,feature_39,feature_43,feature_44,feature_45,feature_46,feature_48,feature_49,feature_50,feature_51,feature_52,feature_53,feature_55,feature_56,feature_57,feature_58,feature_59,feature_60,feature_61,feature_62,feature_63,feature_64,feature_65,feature_67,feature_68,feature_69,feature_70,feature_71,feature_72,feature_73,feature_74,feature_75,feature_76,feature_77,feature_79,feature_81,feature_82,feature_83,feature_84,feature_85,feature_86,feature_87,feature_89,feature_91,feature_93,feature_94,feature_95,feature_23_boxcox,feature_35_boxcox,feature_38_boxcox,feature_40_boxcox,feature_47_boxcox,feature_54_boxcox,feature_66_boxcox,feature_78_boxcox,feature_80_boxcox,feature_88_boxcox,feature_90_boxcox,combined_actigraphy_stat
0,5.0,0.0,51.0,16.877316,46.0,50.8,28.0,68.6,83.8,111.6,4.2,4.8,39.2,0.0,0.0,18.58,2.6,29.06,1.6,0.0,0.0,7.0,0.0,6.0,0.0,6.0,1.0,2.0,2.66855,16.8792,932.498,1492.0,8.25598,41.5862,13.8177,3.06143,9.21377,1.0,24.4349,8.89536,38.9177,19.5413,32.6909,3.1638,2.5882,42.0,56.6,3.0,84.386578,15.0,50.631947,0.545865,0.041082,1419967000000.0,1.126637,4.552216,9.335588,0.009726,0.019945,0.424811,1.493612e-12,0.025531,0.644944,2,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,9.0,0.0,67.2,14.03559,48.0,46.0,22.0,75.0,70.0,122.0,4.2,4.8,39.2,3.0,0.0,22.98,1.4,21.46,1.8,5.0,0.0,11.0,1.0,11.0,1.0,3.0,0.0,2.0,2.57949,14.0371,936.656,1498.65,6.01993,42.0291,12.8254,1.21172,3.97085,1.0,21.0352,14.974,39.4497,15.4107,27.0552,2.6258,2.34,46.0,64.0,0.0,126.320313,0.0,0.0,0.282883,0.095303,3113133000000.0,1.14301,1.961865,4.023363,0.010741,0.022027,0.321056,1.24664e-12,0.027819,0.6506,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,10.0,1.0,71.0,16.648696,56.5,75.6,28.0,65.0,94.0,117.0,5.0,7.0,33.0,20.0,1.0,10.2,1.0,14.7,2.0,7.0,1.0,10.0,1.0,10.0,1.0,5.0,0.0,3.2,3.66177,17.33532,1195.051,2208.25,17.386128,69.5518,13.87402,3.461298,17.488204,1.2,33.03372,19.13194,65.89,32.59526,50.41984,2.028,2.17,38.0,54.0,2.0,166.486961,20.0,33.297392,1.008819,0.021644,757441000000.0,1.135335,8.640341,17.723324,0.006535,0.013405,0.576907,2.46071e-12,0.017547,0.634861,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,9.0,0.0,71.0,18.292347,56.0,81.6,26.8,60.0,97.0,117.0,6.0,9.0,37.0,18.0,1.0,18.94,1.8,28.88,2.2,5.0,0.0,7.0,0.0,7.0,0.0,7.0,1.0,3.0,3.84191,18.2943,1131.43,1923.44,15.5925,62.7757,14.074,4.22033,18.8243,2.0,30.4041,16.779,58.9338,26.4798,45.9966,2.3802,2.451,31.0,45.0,0.0,164.631122,0.0,0.0,1.028971,0.020109,719969500000.0,1.13406,9.300462,19.076317,0.006055,0.012419,0.472854,1.953807e-12,0.016213,0.635789,1,0.042645,0.042676,0.043242,0.045038,0.048476,0.045835,0.043521,0.041334,0.039622,0.044423,0.039155,0.042511,0.278738,0.456723,0.457681,0.26365,0.458967,0.000421,0.138588,0.721517,0.54193,0.618317,0.678421,0.632449,0.622641,0.540742,0.252408,0.433229,0.001938,0.266193,0.420612,0.50821,0.614776,0.240254,0.812929,0.654278,9.9e-05,0.000105,3.386895e-07,0.638357,0.00797,0.007559,0.480474,0.183379,0.338868,0.199188,0.141434,0.261364,1.219247e-09,0.65364,0.491204,0.363322,0.558674,0.193358,0.228993,0.514928,0.409082,0.324099,0.432641,1.338952e-08,0.697641,0.632515,0.547483,0.720847,0.196587,0.31049,0.639856,0.63509,0.33663,0.608148,6.690705e-07,0.811975,0.745902,0.793446,0.203815,0.118262,0.092478,0.138501,0.23968,0.000381,0.080722,0.998209,0.839868,0.221044,-1.268304,-3.337948,-0.002456,-4.426193,-3.088532,-6.034088,-8.260575,-3.676821,-0.395425,-0.001557,-0.777888,0.518477
4,18.0,1.0,70.6,16.570716,53.05,68.36,28.0,67.6,85.2,110.6,4.8,9.0,43.8,12.0,0.4,20.74,2.0,21.08,1.8,16.0,0.4,8.6,0.6,7.7,0.4,9.1,0.6,3.2,3.66177,17.33532,1195.051,2208.25,17.386128,69.5518,13.87402,3.461298,17.488204,1.2,33.03372,19.13194,65.89,32.59526,50.41984,1.04,2.3418,33.8,48.4,1.8,298.272883,32.4,29.827288,1.008819,0.021644,757441000000.0,1.135335,8.640341,17.723324,0.007227,0.014825,0.614425,2.46071e-12,0.019405,0.634861,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
5,13.0,1.0,50.0,22.279952,59.5,112.2,25.0,60.0,73.0,102.0,4.2,4.8,39.2,12.0,0.0,16.5,2.0,17.9,2.0,6.0,0.0,10.0,1.0,11.0,1.0,8.0,0.0,2.0,4.33036,30.1865,1330.97,1996.45,30.2124,84.0285,16.6877,13.4988,67.9715,2.0,32.9141,20.902,79.6982,35.3804,63.1265,2.414,4.11,40.0,56.0,0.0,289.639376,0.0,0.0,2.251718,0.005571,262557500000.0,1.134924,33.582468,68.882927,0.004403,0.009032,0.594629,1.982492e-12,0.011961,0.6278,1,0.498981,0.507874,0.506758,0.516595,0.526931,0.518792,0.512327,0.522206,0.500484,0.50464,0.513426,0.506268,0.524493,0.464179,0.183297,0.076892,0.201258,0.648223,0.055414,0.379109,0.280706,0.6212,0.70098,0.393119,0.443718,0.624326,0.129813,0.559059,0.973229,0.129856,0.658982,0.736637,0.76792,0.168795,0.789989,0.64851,2.1e-05,5.4e-05,0.0004414713,0.027227,2.2e-05,0.003486,0.533289,0.642319,0.433224,0.013609,0.010706,0.016909,0.02486973,0.343555,0.179798,0.221327,0.612637,0.226854,0.526252,0.476078,0.053047,0.045556,0.077106,0.9906693,0.27965,0.422265,0.529694,0.709783,0.219557,0.369415,0.514185,0.289574,0.059921,0.321687,0.9999974,0.431582,0.856462,0.749528,0.219658,0.130645,0.158716,0.168184,0.249119,1.0,0.079093,0.999794,0.839888,0.230974,-1.201298,-5.561385,-0.004617,-3.983888,-2.581788,-4.919829,-11.762419,-3.907207,-0.473372,-0.000801,-0.776802,6.135283
6,10.0,0.0,59.2,19.66076,55.0,84.6,26.2,123.0,83.0,163.0,4.2,4.8,39.2,9.0,1.0,22.178,2.2,23.952,2.4,2.0,0.0,11.0,1.0,11.0,1.0,11.0,1.0,2.0,3.78271,19.6629,1135.86,1817.38,16.3275,63.247,14.7,4.96291,21.353,2.0,30.8936,16.0259,59.4643,26.1957,47.2211,2.3802,3.67,27.0,40.0,3.0,196.607603,30.0,58.982281,1.085954,0.017729,649145600000.0,1.133119,10.549809,21.638152,0.00584,0.011978,0.476285,1.889857e-12,0.015658,0.635171,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
7,10.0,1.0,60.8,16.861286,59.25,84.2,27.0,71.0,90.0,116.0,4.2,4.8,39.2,0.0,0.0,12.6,2.0,11.1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,3.0,4.05726,16.8631,1180.04,1888.06,21.94,67.9527,13.6092,3.25395,16.2474,2.0,28.5367,17.476,63.8954,28.768,50.4767,1.936,1.27,42.4,66.8,2.0,168.612865,20.0,33.722573,0.963488,0.023296,809995300000.0,1.133806,8.027301,16.464742,0.005868,0.012035,0.485536,2.185966e-12,0.015784,0.631973,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
8,15.0,0.0,65.0,16.570716,49.4,114.8,28.0,69.8,82.4,108.2,5.6,6.6,42.4,8.8,0.2,19.62,2.4,21.2,1.8,12.2,0.8,10.9,0.8,10.4,0.8,8.9,0.8,3.2,3.66177,17.33532,1195.051,2208.25,17.386128,69.5518,13.87402,3.461298,17.488204,1.2,33.03372,19.13194,65.89,32.59526,50.41984,2.436,2.3418,34.2,48.8,2.0,248.560736,30.0,33.141431,1.008819,0.021644,757441000000.0,1.135335,8.640341,17.723324,0.004304,0.008828,0.659823,2.46071e-12,0.011555,0.634861,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
9,19.0,1.0,56.4,20.889514,53.4,118.88,28.0,71.4,85.2,109.0,4.8,9.0,43.8,12.0,0.4,20.74,2.0,21.08,1.8,16.0,0.4,8.6,0.6,7.7,0.4,9.1,0.6,3.2,3.66177,17.33532,1195.051,2208.25,17.386128,69.5518,13.87402,3.461298,17.488204,1.2,33.03372,19.13194,65.89,32.59526,50.41984,2.028,2.3418,38.2,54.0,1.8,396.900772,34.2,37.601126,1.008819,0.021644,757441000000.0,1.135335,8.640341,17.723324,0.004156,0.008525,0.610398,2.46071e-12,0.011159,0.634861,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [17]:
test.head(10)

Unnamed: 0,Basic_Demos-Age,Basic_Demos-Sex,CGAS-CGAS_Score,Physical-BMI,Physical-Height,Physical-Weight,Physical-Waist_Circumference,Physical-Diastolic_BP,Physical-HeartRate,Physical-Systolic_BP,Fitness_Endurance-Max_Stage,Fitness_Endurance-Time_Mins,Fitness_Endurance-Time_Sec,FGC-FGC_CU,FGC-FGC_CU_Zone,FGC-FGC_GSND,FGC-FGC_GSND_Zone,FGC-FGC_GSD,FGC-FGC_GSD_Zone,FGC-FGC_PU,FGC-FGC_PU_Zone,FGC-FGC_SRL,FGC-FGC_SRL_Zone,FGC-FGC_SRR,FGC-FGC_SRR_Zone,FGC-FGC_TL,FGC-FGC_TL_Zone,BIA-BIA_Activity_Level_num,BIA-BIA_BMC,BIA-BIA_BMI,BIA-BIA_BMR,BIA-BIA_DEE,BIA-BIA_ECW,BIA-BIA_FFM,BIA-BIA_FFMI,BIA-BIA_FMI,BIA-BIA_Fat,BIA-BIA_Frame_num,BIA-BIA_ICW,BIA-BIA_LDM,BIA-BIA_LST,BIA-BIA_SMM,BIA-BIA_TBW,PAQ_A-PAQ_A_Total,PAQ_C-PAQ_C_Total,SDS-SDS_Total_Raw,SDS-SDS_Total_T,PreInt_EduHx-computerinternet_hoursday,BMI_Age,Internet_Hours_Age,BMI_Internet_Hours,BFP_BMI,FFMI_BFP,FMI_BFP,LST_TBW,BFP_BMR,BFP_DEE,BMR_Weight,DEE_Weight,SMM_Height,Muscle_to_Fat,Hydration_Status,ICW_TBW,feature_0,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,feature_10,feature_11,feature_12,feature_13,feature_14,feature_15,feature_16,feature_17,feature_18,feature_19,feature_20,feature_21,feature_22,feature_24,feature_25,feature_26,feature_27,feature_28,feature_29,feature_30,feature_31,feature_32,feature_33,feature_34,feature_36,feature_37,feature_41,feature_42,feature_39,feature_43,feature_44,feature_45,feature_46,feature_48,feature_49,feature_50,feature_51,feature_52,feature_53,feature_55,feature_56,feature_57,feature_58,feature_59,feature_60,feature_61,feature_62,feature_63,feature_64,feature_65,feature_67,feature_68,feature_69,feature_70,feature_71,feature_72,feature_73,feature_74,feature_75,feature_76,feature_77,feature_79,feature_81,feature_82,feature_83,feature_84,feature_85,feature_86,feature_87,feature_89,feature_91,feature_93,feature_94,feature_95,feature_23_boxcox,feature_35_boxcox,feature_38_boxcox,feature_40_boxcox,feature_47_boxcox,feature_54_boxcox,feature_66_boxcox,feature_78_boxcox,feature_80_boxcox,feature_88_boxcox,feature_90_boxcox,combined_actigraphy_stat
0,5,0,51.0,16.877316,46.0,50.8,,,,,,,,0.0,0.0,,,,,0.0,0.0,7.0,0.0,6.0,0.0,6.0,1.0,2.0,2.66855,16.8792,932.498,1492.0,8.25598,41.5862,13.8177,3.06143,9.21377,1.0,24.4349,8.89536,38.9177,19.5413,32.6909,,,,,3.0,84.386578,15.0,50.631947,0.545865,0.041082,15.605412,1.126637,4.552216,9.335588,0.009726,0.019945,0.424811,0.135907,0.025531,0.644944,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,9,0,,14.03559,48.0,46.0,22.0,75.0,70.0,122.0,,,,3.0,0.0,,,,,5.0,0.0,11.0,1.0,11.0,1.0,3.0,0.0,2.0,2.57949,14.0371,936.656,1498.65,6.01993,42.0291,12.8254,1.21172,3.97085,1.0,21.0352,14.974,39.4497,15.4107,27.0552,,2.34,46.0,64.0,0.0,126.320313,0.0,0.0,0.282883,0.095303,0.091406,1.14301,1.961865,4.023363,0.010741,0.022027,0.321056,42.458431,0.027819,0.6506,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,10,1,71.0,16.648696,56.5,75.6,,65.0,94.0,117.0,5.0,7.0,33.0,20.0,1.0,10.2,1.0,14.7,2.0,7.0,1.0,10.0,1.0,10.0,1.0,5.0,0.0,,,,,,,,,,,,,,,,,,2.17,38.0,54.0,2.0,166.486961,20.0,33.297392,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,9,0,71.0,18.292347,56.0,81.6,,60.0,97.0,117.0,6.0,9.0,37.0,18.0,1.0,,,,,5.0,0.0,7.0,0.0,7.0,0.0,7.0,1.0,3.0,3.84191,18.2943,1131.43,1923.44,15.5925,62.7757,14.074,4.22033,18.8243,2.0,30.4041,16.779,58.9338,26.4798,45.9966,,2.451,31.0,45.0,0.0,164.631122,0.0,0.0,1.028971,0.020109,53.329407,1.13406,9.300462,19.076317,0.006055,0.012419,0.472854,0.026377,0.016213,0.635789,0.481123,0.540605,0.493155,0.514614,0.503751,0.485609,0.482426,0.509025,0.460082,0.49574,0.503157,0.511096,0.492366,0.530017,0.531075,0.495958,0.507109,0.49327,0.55081,0.497195,0.505827,0.534937,0.514518,0.527145,0.540329,0.44603,0.527444,0.484028,0.498141,0.508666,0.470087,0.468009,0.476987,0.498891,0.50317,0.493456,0.456037,0.470231,0.449861,0.490415,0.521358,0.483599,0.474641,0.471748,0.4822,0.549858,0.500101,0.523254,0.509095,0.49235,0.515676,0.532098,0.437587,0.475082,0.477448,0.534927,0.504966,0.536757,0.538274,0.488032,0.524308,0.555413,0.490633,0.445727,0.506912,0.513041,0.502835,0.535134,0.540278,0.51511,0.458379,0.539643,0.533299,0.475032,0.49486,0.540515,0.435022,0.510052,0.499791,0.437043,0.495235,0.457943,0.424259,0.451284,-0.70872,-1.088454,-0.016078,-0.765804,-0.843812,-0.642705,-0.872484,-0.659838,-0.559565,-0.008216,-0.457476,5.980383
4,18,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.04,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
5,13,1,50.0,22.279952,59.5,112.2,,60.0,73.0,102.0,,,,12.0,0.0,16.5,2.0,17.9,2.0,6.0,0.0,10.0,1.0,11.0,1.0,8.0,0.0,2.0,4.33036,30.1865,1330.97,1996.45,30.2124,84.0285,16.6877,13.4988,67.9715,2.0,32.9141,20.902,79.6982,35.3804,63.1265,,4.11,40.0,56.0,0.0,289.639376,0.0,0.0,2.251718,0.005571,16769.578841,1.134924,33.582468,68.882927,0.004403,0.009032,0.594629,3.1e-05,0.011961,0.6278,0.481831,0.537926,0.492185,0.515262,0.501318,0.484668,0.483908,0.508913,0.462032,0.496929,0.503748,0.51037,0.493528,0.532014,0.529658,0.496748,0.50821,0.494717,0.550257,0.49498,0.502285,0.534633,0.518532,0.527507,0.539274,0.446454,0.526839,0.485314,0.500821,0.506617,0.470089,0.470423,0.477164,0.499826,0.50389,0.493619,0.457166,0.469156,0.453805,0.488554,0.521235,0.485616,0.474009,0.471821,0.483248,0.547047,0.499969,0.519361,0.50909,0.489347,0.517165,0.530486,0.43833,0.477049,0.47757,0.5338,0.50549,0.535093,0.53488,0.487729,0.524741,0.553389,0.490608,0.447126,0.510058,0.512269,0.500627,0.536451,0.539742,0.514463,0.459608,0.539096,0.530739,0.478189,0.49538,0.5399,0.439219,0.512317,0.497842,0.43982,0.496052,0.461386,0.42349,0.454268,-0.705567,-1.085869,-0.016078,-0.763114,-0.844117,-0.647135,-0.864484,-0.663209,-0.55789,-0.008216,-0.459775,5.979089
6,10,0,,19.66076,55.0,84.6,,123.0,83.0,163.0,,,,9.0,1.0,,,,,2.0,0.0,11.0,1.0,11.0,1.0,11.0,1.0,2.0,3.78271,19.6629,1135.86,1817.38,16.3275,63.247,14.7,4.96291,21.353,2.0,30.8936,16.0259,59.4643,26.1957,47.2211,,3.67,27.0,40.0,3.0,196.607603,30.0,58.982281,1.085954,0.017729,125.357983,1.133119,10.549809,21.638152,0.00584,0.011978,0.476285,0.009786,0.015658,0.635171,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
7,10,1,,16.861286,59.25,84.2,27.0,71.0,90.0,116.0,,,,0.0,0.0,12.6,2.0,11.1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,3.0,4.05726,16.8631,1180.04,1888.06,21.94,67.9527,13.6092,3.25395,16.2474,2.0,28.5367,17.476,63.8954,28.768,50.4767,,1.27,,,2.0,168.612865,20.0,33.722573,0.963488,0.023296,12.803665,1.133806,8.027301,16.464742,0.005868,0.012035,0.485536,0.13829,0.015784,0.631973,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
8,15,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.0,,30.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
9,19,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [18]:
# Replace inf
if np.any(np.isinf(train)):
    train = train.replace([np.inf, -np.inf], np.nan)

In [19]:
# Function definitions
def quadratic_weighted_kappa(y_true, y_pred):
    return cohen_kappa_score(y_true, y_pred, weights='quadratic')

def threshold_Rounder(oof_non_rounded, thresholds):
    return np.where(oof_non_rounded < thresholds[0], 0,
                    np.where(oof_non_rounded < thresholds[1], 1,
                             np.where(oof_non_rounded < thresholds[2], 2, 3)))

def evaluate_predictions(thresholds, y_true, oof_non_rounded):
    rounded_p = threshold_Rounder(oof_non_rounded, thresholds)
    return -quadratic_weighted_kappa(y_true, rounded_p)

def TrainML(model_class, test_data):
    X = train.drop(['sii'], axis=1)
    y = train['sii']

    SKF = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=SEED)
    
    train_S = []
    test_S = []
    
    oof_non_rounded = np.zeros(len(y), dtype=float) 
    oof_rounded = np.zeros(len(y), dtype=int) 
    test_preds = np.zeros((len(test_data), n_splits))

    for fold, (train_idx, test_idx) in enumerate(tqdm(SKF.split(X, y), desc="Training Folds", total=n_splits)):
        X_train, X_val = X.iloc[train_idx], X.iloc[test_idx]
        y_train, y_val = y.iloc[train_idx], y.iloc[test_idx]

        model = clone(model_class)
        model.fit(X_train, y_train)

        y_train_pred = model.predict(X_train)
        y_val_pred = model.predict(X_val)

        oof_non_rounded[test_idx] = y_val_pred
        y_val_pred_rounded = y_val_pred.round(0).astype(int)
        oof_rounded[test_idx] = y_val_pred_rounded

        train_kappa = quadratic_weighted_kappa(y_train, y_train_pred.round(0).astype(int))
        val_kappa = quadratic_weighted_kappa(y_val, y_val_pred_rounded)

        train_S.append(train_kappa)
        test_S.append(val_kappa)
        
        test_preds[:, fold] = model.predict(test_data)
        
        print(f"Fold {fold+1} - Train QWK: {train_kappa:.4f}, Validation QWK: {val_kappa:.4f}")
        clear_output(wait=True)

    print(f"Mean Train QWK --> {np.mean(train_S):.4f}")
    print(f"Mean Validation QWK ---> {np.mean(test_S):.4f}")

    KappaOPtimizer = minimize(evaluate_predictions,
                              x0=[0.5, 1.5, 2.5], args=(y, oof_non_rounded), 
                              method='Nelder-Mead')
    assert KappaOPtimizer.success, "Optimization did not converge."
    
    oof_tuned = threshold_Rounder(oof_non_rounded, KappaOPtimizer.x)
    tKappa = quadratic_weighted_kappa(y, oof_tuned)

    print(f"----> || Optimized QWK SCORE :: {Fore.CYAN}{Style.BRIGHT} {tKappa:.3f}{Style.RESET_ALL}")

    tpm = test_preds.mean(axis=1)
    tpTuned = threshold_Rounder(tpm, KappaOPtimizer.x)
    
    submission = pd.DataFrame({
        'id': sample['id'],
        'sii': tpTuned
    })

    return submission

In [20]:
# Model parameters from hyper-tuning
SEED = 42
n_splits = 5

Params = {
    'n_estimators': 682,
    'learning_rate': 0.022201704131134002,
    'max_depth': 3,
    'num_leaves': 843,
    'min_data_in_leaf': 13,
    'feature_fraction': 0.893,
    'bagging_fraction': 0.8918812900108436,
    'bagging_freq': 10,
    'lambda_l1': 4.79779460021304e-07,
    'lambda_l2': 2.0055171376757653e-06,
    'min_child_samples': 39,
    'colsample_bytree': 0.9264391369678474
}


XGB_Params = {
    'learning_rate': 0.02829495436971426,
    'max_depth': 8,
    'n_estimators': 484,
    'subsample': 0.9834706801888403,
    'colsample_bytree': 0.7681852816292032,
    'reg_alpha': 0.010495697417466835,
    'reg_lambda': 0.022138771647168275,
    'random_state': SEED,
    'tree_method': 'exact',
    'min_child_weight': 9
}


CatBoost_Params = {
    'learning_rate': 0.09981560408272906,
    'depth': 6,
    'iterations': 43,
    'random_seed': SEED,
    'verbose': 0,
    'l2_leaf_reg': 3.4155853181132496,
    'bootstrap_type': 'MVS',
    'random_strength': 0.06930624569348895
}

# Create model instances
Light = LGBMRegressor(**Params, random_state=SEED, verbose=-1)
XGB_Model = XGBRegressor(**XGB_Params)
CatBoost_Model = CatBoostRegressor(**CatBoost_Params)

# Combine models using Voting Regressor
voting_model = VotingRegressor(estimators=[
    ('lightgbm', Light),
    ('xgboost', XGB_Model),
    ('catboost', CatBoost_Model)
])

In [21]:
Submission1 = TrainML(voting_model, test)

# Save submission
Submission1.to_csv('submission_1.csv', index=False)

Training Folds: 100%|██████████| 5/5 [01:00<00:00, 12.06s/it]

Mean Train QWK --> 0.7815
Mean Validation QWK ---> 0.4322





----> || Optimized QWK SCORE :: [36m[1m 0.490[0m


In [22]:
Submission1

Unnamed: 0,id,sii
0,00008ff9,1
1,000fd460,0
2,00105258,1
3,00115b9f,0
4,0016bb22,1
5,001f3379,1
6,0038ba98,1
7,0068a485,0
8,0069fbed,0
9,0083e397,0


In [23]:
# Submission #2
# Removed  
time_series_cols = [
    'feature_0', 'feature_1', 'feature_2', 'feature_3', 'feature_4','feature_5', 'feature_6', 
    'feature_7', 'feature_8', 'feature_9','feature_10', 'feature_11',
    'feature_12', 'feature_13', 'feature_14',
    'feature_15', 'feature_16', 'feature_17', 'feature_18', 'feature_19',
    'feature_20', 'feature_21', 'feature_22', 'feature_24',
    'feature_25', 'feature_26', 'feature_27', 'feature_28', 'feature_29',
    'feature_30', 'feature_31', 'feature_32', 'feature_33', 'feature_34',
    'feature_36', 'feature_37', 'feature_41', 'feature_42', 'feature_39',
    'feature_43', 'feature_44',
    'feature_45', 'feature_46', 'feature_48', 'feature_49',
    'feature_50', 'feature_51', 'feature_52', 'feature_53', 
    'feature_55', 'feature_56', 'feature_57', 'feature_58', 'feature_59',
    'feature_60', 'feature_61', 'feature_62', 'feature_63', 'feature_64',
    'feature_65', 'feature_67', 'feature_68', 'feature_69',
    'feature_70', 'feature_71', 'feature_72', 'feature_73', 'feature_74',
    'feature_75', 'feature_76', 'feature_77', 'feature_79',
    'feature_81', 'feature_82', 'feature_83', 'feature_84',
    'feature_85', 'feature_86', 'feature_87', 'feature_89',
    'feature_91', 'feature_93', 'feature_94',
    'feature_95', 'feature_23_boxcox', 'feature_35_boxcox', 'feature_38_boxcox',
    'feature_40_boxcox', 'feature_47_boxcox', 'feature_54_boxcox',
    'feature_66_boxcox', 'feature_78_boxcox', 'feature_80_boxcox',
    'feature_88_boxcox', 'feature_90_boxcox', 'combined_actigraphy_stat'
]

featuresCols = ['Basic_Demos-Enroll_Season', 'Basic_Demos-Age', 'Basic_Demos-Sex',
                'CGAS-Season', 'CGAS-CGAS_Score', 'Physical-Season', 'Physical-BMI',
                'Physical-Height', 'Physical-Weight', 'Physical-Waist_Circumference',
                'Physical-Diastolic_BP', 'Physical-HeartRate', 'Physical-Systolic_BP',
                'Fitness_Endurance-Season', 'Fitness_Endurance-Max_Stage',
                'Fitness_Endurance-Time_Mins', 'Fitness_Endurance-Time_Sec',
                'FGC-Season', 'FGC-FGC_CU', 'FGC-FGC_CU_Zone', 'FGC-FGC_GSND',
                'FGC-FGC_GSND_Zone', 'FGC-FGC_GSD', 'FGC-FGC_GSD_Zone', 'FGC-FGC_PU',
                'FGC-FGC_PU_Zone', 'FGC-FGC_SRL', 'FGC-FGC_SRL_Zone', 'FGC-FGC_SRR',
                'FGC-FGC_SRR_Zone', 'FGC-FGC_TL', 'FGC-FGC_TL_Zone', 'BIA-Season',
                'BIA-BIA_Activity_Level_num', 'BIA-BIA_BMC', 'BIA-BIA_BMI',
                'BIA-BIA_BMR', 'BIA-BIA_DEE', 'BIA-BIA_ECW', 'BIA-BIA_FFM',
                'BIA-BIA_FFMI', 'BIA-BIA_FMI', 'BIA-BIA_Fat', 'BIA-BIA_Frame_num',
                'BIA-BIA_ICW', 'BIA-BIA_LDM', 'BIA-BIA_LST', 'BIA-BIA_SMM',
                'BIA-BIA_TBW', 'PAQ_A-Season', 'PAQ_A-PAQ_A_Total', 'PAQ_C-Season',
                'PAQ_C-PAQ_C_Total', 'SDS-Season', 'SDS-SDS_Total_Raw',
                'SDS-SDS_Total_T', 'PreInt_EduHx-Season',
                'PreInt_EduHx-computerinternet_hoursday', 'sii']

featuresCols += time_series_cols

train2 = train2[featuresCols]
train2 = train2.dropna(subset='sii')

featuresCols = ['Basic_Demos-Enroll_Season', 'Basic_Demos-Age', 'Basic_Demos-Sex',
                'CGAS-Season', 'CGAS-CGAS_Score', 'Physical-Season', 'Physical-BMI',
                'Physical-Height', 'Physical-Weight', 'Physical-Waist_Circumference',
                'Physical-Diastolic_BP', 'Physical-HeartRate', 'Physical-Systolic_BP',
                'Fitness_Endurance-Season', 'Fitness_Endurance-Max_Stage',
                'Fitness_Endurance-Time_Mins', 'Fitness_Endurance-Time_Sec',
                'FGC-Season', 'FGC-FGC_CU', 'FGC-FGC_CU_Zone', 'FGC-FGC_GSND',
                'FGC-FGC_GSND_Zone', 'FGC-FGC_GSD', 'FGC-FGC_GSD_Zone', 'FGC-FGC_PU',
                'FGC-FGC_PU_Zone', 'FGC-FGC_SRL', 'FGC-FGC_SRL_Zone', 'FGC-FGC_SRR',
                'FGC-FGC_SRR_Zone', 'FGC-FGC_TL', 'FGC-FGC_TL_Zone', 'BIA-Season',
                'BIA-BIA_Activity_Level_num', 'BIA-BIA_BMC', 'BIA-BIA_BMI',
                'BIA-BIA_BMR', 'BIA-BIA_DEE', 'BIA-BIA_ECW', 'BIA-BIA_FFM',
                'BIA-BIA_FFMI', 'BIA-BIA_FMI', 'BIA-BIA_Fat', 'BIA-BIA_Frame_num',
                'BIA-BIA_ICW', 'BIA-BIA_LDM', 'BIA-BIA_LST', 'BIA-BIA_SMM',
                'BIA-BIA_TBW', 'PAQ_A-Season', 'PAQ_A-PAQ_A_Total', 'PAQ_C-Season',
                'PAQ_C-PAQ_C_Total', 'SDS-Season', 'SDS-SDS_Total_Raw',
                'SDS-SDS_Total_T', 'PreInt_EduHx-Season',
                'PreInt_EduHx-computerinternet_hoursday']

featuresCols += time_series_cols
test2 = test2[featuresCols]

cat_c = ['Basic_Demos-Enroll_Season', 'CGAS-Season', 'Physical-Season', 
          'Fitness_Endurance-Season', 'FGC-Season', 'BIA-Season', 
          'PAQ_A-Season', 'PAQ_C-Season', 'SDS-Season', 'PreInt_EduHx-Season']

def update(df):
    global cat_c
    for c in cat_c: 
        df[c] = df[c].fillna('Missing')
        df[c] = df[c].astype('category')
    return df
        
train2 = update(train2)
test2 = update(test2)

def create_mapping(column, dataset):
    unique_values = dataset[column].unique()
    return {value: idx for idx, value in enumerate(unique_values)}

for col in cat_c:
    mapping = create_mapping(col, train2)
    mappingTe = create_mapping(col, test2)
    
    train2[col] = train2[col].replace(mapping).astype(int)
    test2[col] = test2[col].replace(mappingTe).astype(int)

def quadratic_weighted_kappa(y_true, y_pred):
    return cohen_kappa_score(y_true, y_pred, weights='quadratic')

def threshold_Rounder(oof_non_rounded, thresholds):
    return np.where(oof_non_rounded < thresholds[0], 0,
                    np.where(oof_non_rounded < thresholds[1], 1,
                             np.where(oof_non_rounded < thresholds[2], 2, 3)))

def evaluate_predictions(thresholds, y_true, oof_non_rounded):
    rounded_p = threshold_Rounder(oof_non_rounded, thresholds)
    return -quadratic_weighted_kappa(y_true, rounded_p)

def TrainML(model_class, test_data):
    X = train2.drop(['sii'], axis=1)
    y = train2['sii']

    SKF = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=SEED)
    
    train2_S = []
    test2_S = []
    
    oof_non_rounded = np.zeros(len(y), dtype=float) 
    oof_rounded = np.zeros(len(y), dtype=int) 
    test_preds = np.zeros((len(test_data), n_splits))

    for fold, (train_idx, test_idx) in enumerate(tqdm(SKF.split(X, y), desc="Training Folds", total=n_splits)):
        X_train, X_val = X.iloc[train_idx], X.iloc[test_idx]
        y_train, y_val = y.iloc[train_idx], y.iloc[test_idx]

        model = clone(model_class)
        model.fit(X_train, y_train)

        y_train_pred = model.predict(X_train)
        y_val_pred = model.predict(X_val)

        oof_non_rounded[test_idx] = y_val_pred
        y_val_pred_rounded = y_val_pred.round(0).astype(int)
        oof_rounded[test_idx] = y_val_pred_rounded

        train_kappa = quadratic_weighted_kappa(y_train, y_train_pred.round(0).astype(int))
        val_kappa = quadratic_weighted_kappa(y_val, y_val_pred_rounded)

        train2_S.append(train_kappa)
        test2_S.append(val_kappa)
        
        test_preds[:, fold] = model.predict(test_data)
        
        print(f"Fold {fold+1} - Train QWK: {train_kappa:.4f}, Validation QWK: {val_kappa:.4f}")
        clear_output(wait=True)

    print(f"Mean Train QWK --> {np.mean(train2_S):.4f}")
    print(f"Mean Validation QWK ---> {np.mean(test2_S):.4f}")

    KappaOPtimizer = minimize(evaluate_predictions,
                              x0=[0.5, 1.5, 2.5], args=(y, oof_non_rounded), 
                              method='Nelder-Mead')
    assert KappaOPtimizer.success, "Optimization did not converge."
    
    oof_tuned = threshold_Rounder(oof_non_rounded, KappaOPtimizer.x)
    tKappa = quadratic_weighted_kappa(y, oof_tuned)

    print(f"----> || Optimized QWK SCORE :: {Fore.CYAN}{Style.BRIGHT} {tKappa:.3f}{Style.RESET_ALL}")

    tpm = test_preds.mean(axis=1)
    tpTuned = threshold_Rounder(tpm, KappaOPtimizer.x)
    
    submission = pd.DataFrame({
        'id': sample['id'],
        'sii': tpTuned
    })

    return submission

# Model parameters for LightGBM
Params = {
    'n_estimators': 682,
    'learning_rate': 0.022201704131134002,
    'max_depth': 3,
    'num_leaves': 843,
    'min_data_in_leaf': 13,
    'feature_fraction': 0.893,
    'bagging_fraction': 0.8918812900108436,
    'bagging_freq': 10,
    'lambda_l1': 4.79779460021304e-07,
    'lambda_l2': 2.0055171376757653e-06,
    'min_child_samples': 39,
    'colsample_bytree': 0.9264391369678474
}


# XGBoost parameters
XGB_Params = {
    'learning_rate': 0.02829495436971426,
    'max_depth': 8,
    'n_estimators': 484,
    'subsample': 0.9834706801888403,
    'colsample_bytree': 0.7681852816292032,
    'reg_alpha': 0.010495697417466835,
    'reg_lambda': 0.022138771647168275,
    'random_state': SEED,
    'tree_method': 'exact',
    'min_child_weight': 9
}


CatBoost_Params = {
    'learning_rate': 0.09981560408272906,
    'depth': 6,
    'iterations': 43,
    'random_seed': SEED,
    'verbose': 0,
    'l2_leaf_reg': 3.4155853181132496,
    'bootstrap_type': 'MVS',
    'random_strength': 0.06930624569348895
}

# Create model instances
Light = LGBMRegressor(**Params, random_state=SEED, verbose=-1)
XGB_Model = XGBRegressor(**XGB_Params)
CatBoost_Model = CatBoostRegressor(**CatBoost_Params)

# Combine models using Voting Regressor
voting_model = VotingRegressor(estimators=[
    ('lightgbm', Light),
    ('xgboost', XGB_Model),
    ('catboost', CatBoost_Model)
])

# Train the ensemble model
Submission2 = TrainML(voting_model, test2)

# Save submission
Submission2.to_csv('submission_2.csv', index=False)
Submission2

Training Folds: 100%|██████████| 5/5 [01:21<00:00, 16.26s/it]

Mean Train QWK --> 0.7850
Mean Validation QWK ---> 0.4376
----> || Optimized QWK SCORE :: [36m[1m 0.486[0m





Unnamed: 0,id,sii
0,00008ff9,0
1,000fd460,0
2,00105258,0
3,00115b9f,0
4,0016bb22,0
5,001f3379,0
6,0038ba98,1
7,0068a485,0
8,0069fbed,0
9,0083e397,0


In [24]:
# Submission 3
time_series_cols = [
    'feature_0', 'feature_1', 'feature_2', 'feature_3', 'feature_4','feature_5', 'feature_6', 
    'feature_7', 'feature_8', 'feature_9','feature_10', 'feature_11',
    'feature_12', 'feature_13', 'feature_14',
    'feature_15', 'feature_16', 'feature_17', 'feature_18', 'feature_19',
    'feature_20', 'feature_21', 'feature_22', 'feature_24',
    'feature_25', 'feature_26', 'feature_27', 'feature_28', 'feature_29',
    'feature_30', 'feature_31', 'feature_32', 'feature_33', 'feature_34',
    'feature_36', 'feature_37', 'feature_41', 'feature_42', 'feature_39',
    'feature_43', 'feature_44',
    'feature_45', 'feature_46', 'feature_48', 'feature_49',
    'feature_50', 'feature_51', 'feature_52', 'feature_53', 
    'feature_55', 'feature_56', 'feature_57', 'feature_58', 'feature_59',
    'feature_60', 'feature_61', 'feature_62', 'feature_63', 'feature_64',
    'feature_65', 'feature_67', 'feature_68', 'feature_69',
    'feature_70', 'feature_71', 'feature_72', 'feature_73', 'feature_74',
    'feature_75', 'feature_76', 'feature_77', 'feature_79',
    'feature_81', 'feature_82', 'feature_83', 'feature_84',
    'feature_85', 'feature_86', 'feature_87', 'feature_89',
    'feature_91', 'feature_93', 'feature_94',
    'feature_95', 'feature_23_boxcox', 'feature_35_boxcox', 'feature_38_boxcox',
    'feature_40_boxcox', 'feature_47_boxcox', 'feature_54_boxcox',
    'feature_66_boxcox', 'feature_78_boxcox', 'feature_80_boxcox',
    'feature_88_boxcox', 'feature_90_boxcox', 'combined_actigraphy_stat'
]

featuresCols = ['Basic_Demos-Enroll_Season', 'Basic_Demos-Age', 'Basic_Demos-Sex',
                'CGAS-Season', 'CGAS-CGAS_Score', 'Physical-Season', 'Physical-BMI',
                'Physical-Height', 'Physical-Weight', 'Physical-Waist_Circumference',
                'Physical-Diastolic_BP', 'Physical-HeartRate', 'Physical-Systolic_BP',
                'Fitness_Endurance-Season', 'Fitness_Endurance-Max_Stage',
                'Fitness_Endurance-Time_Mins', 'Fitness_Endurance-Time_Sec',
                'FGC-Season', 'FGC-FGC_CU', 'FGC-FGC_CU_Zone', 'FGC-FGC_GSND',
                'FGC-FGC_GSND_Zone', 'FGC-FGC_GSD', 'FGC-FGC_GSD_Zone', 'FGC-FGC_PU',
                'FGC-FGC_PU_Zone', 'FGC-FGC_SRL', 'FGC-FGC_SRL_Zone', 'FGC-FGC_SRR',
                'FGC-FGC_SRR_Zone', 'FGC-FGC_TL', 'FGC-FGC_TL_Zone', 'BIA-Season',
                'BIA-BIA_Activity_Level_num', 'BIA-BIA_BMC', 'BIA-BIA_BMI',
                'BIA-BIA_BMR', 'BIA-BIA_DEE', 'BIA-BIA_ECW', 'BIA-BIA_FFM',
                'BIA-BIA_FFMI', 'BIA-BIA_FMI', 'BIA-BIA_Fat', 'BIA-BIA_Frame_num',
                'BIA-BIA_ICW', 'BIA-BIA_LDM', 'BIA-BIA_LST', 'BIA-BIA_SMM',
                'BIA-BIA_TBW', 'PAQ_A-Season', 'PAQ_A-PAQ_A_Total', 'PAQ_C-Season',
                'PAQ_C-PAQ_C_Total', 'SDS-Season', 'SDS-SDS_Total_Raw',
                'SDS-SDS_Total_T', 'PreInt_EduHx-Season',
                'PreInt_EduHx-computerinternet_hoursday', 'sii']

featuresCols += time_series_cols

train3 = train3[featuresCols]
train3 = train3.dropna(subset='sii')

featuresCols = ['Basic_Demos-Enroll_Season', 'Basic_Demos-Age', 'Basic_Demos-Sex',
                'CGAS-Season', 'CGAS-CGAS_Score', 'Physical-Season', 'Physical-BMI',
                'Physical-Height', 'Physical-Weight', 'Physical-Waist_Circumference',
                'Physical-Diastolic_BP', 'Physical-HeartRate', 'Physical-Systolic_BP',
                'Fitness_Endurance-Season', 'Fitness_Endurance-Max_Stage',
                'Fitness_Endurance-Time_Mins', 'Fitness_Endurance-Time_Sec',
                'FGC-Season', 'FGC-FGC_CU', 'FGC-FGC_CU_Zone', 'FGC-FGC_GSND',
                'FGC-FGC_GSND_Zone', 'FGC-FGC_GSD', 'FGC-FGC_GSD_Zone', 'FGC-FGC_PU',
                'FGC-FGC_PU_Zone', 'FGC-FGC_SRL', 'FGC-FGC_SRL_Zone', 'FGC-FGC_SRR',
                'FGC-FGC_SRR_Zone', 'FGC-FGC_TL', 'FGC-FGC_TL_Zone', 'BIA-Season',
                'BIA-BIA_Activity_Level_num', 'BIA-BIA_BMC', 'BIA-BIA_BMI',
                'BIA-BIA_BMR', 'BIA-BIA_DEE', 'BIA-BIA_ECW', 'BIA-BIA_FFM',
                'BIA-BIA_FFMI', 'BIA-BIA_FMI', 'BIA-BIA_Fat', 'BIA-BIA_Frame_num',
                'BIA-BIA_ICW', 'BIA-BIA_LDM', 'BIA-BIA_LST', 'BIA-BIA_SMM',
                'BIA-BIA_TBW', 'PAQ_A-Season', 'PAQ_A-PAQ_A_Total', 'PAQ_C-Season',
                'PAQ_C-PAQ_C_Total', 'SDS-Season', 'SDS-SDS_Total_Raw',
                'SDS-SDS_Total_T', 'PreInt_EduHx-Season',
                'PreInt_EduHx-computerinternet_hoursday']

featuresCols += time_series_cols
test3 = test3[featuresCols]

def quadratic_weighted_kappa(y_true, y_pred):
    return cohen_kappa_score(y_true, y_pred, weights='quadratic')

def threshold_Rounder(oof_non_rounded, thresholds):
    return np.where(oof_non_rounded < thresholds[0], 0,
                    np.where(oof_non_rounded < thresholds[1], 1,
                             np.where(oof_non_rounded < thresholds[2], 2, 3)))

def evaluate_predictions(thresholds, y_true, oof_non_rounded):
    rounded_p = threshold_Rounder(oof_non_rounded, thresholds)
    return -quadratic_weighted_kappa(y_true, rounded_p)

def TrainML(model_class, test_data):
    X = train3.drop(['sii'], axis=1)
    y = train3['sii']

    SKF = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=SEED)
    
    train_S = []
    test_S = []
    
    oof_non_rounded = np.zeros(len(y), dtype=float) 
    oof_rounded = np.zeros(len(y), dtype=int) 
    test_preds = np.zeros((len(test_data), n_splits))

    for fold, (train_idx, test_idx) in enumerate(tqdm(SKF.split(X, y), desc="Training Folds", total=n_splits)):
        X_train, X_val = X.iloc[train_idx], X.iloc[test_idx]
        y_train, y_val = y.iloc[train_idx], y.iloc[test_idx]

        model = clone(model_class)
        model.fit(X_train, y_train)

        y_train_pred = model.predict(X_train)
        y_val_pred = model.predict(X_val)

        oof_non_rounded[test_idx] = y_val_pred
        y_val_pred_rounded = y_val_pred.round(0).astype(int)
        oof_rounded[test_idx] = y_val_pred_rounded

        train_kappa = quadratic_weighted_kappa(y_train, y_train_pred.round(0).astype(int))
        val_kappa = quadratic_weighted_kappa(y_val, y_val_pred_rounded)

        train_S.append(train_kappa)
        test_S.append(val_kappa)
        
        test_preds[:, fold] = model.predict(test_data)
        
        print(f"Fold {fold+1} - Train QWK: {train_kappa:.4f}, Validation QWK: {val_kappa:.4f}")
        clear_output(wait=True)

    print(f"Mean Train QWK --> {np.mean(train_S):.4f}")
    print(f"Mean Validation QWK ---> {np.mean(test_S):.4f}")

    KappaOPtimizer = minimize(evaluate_predictions,
                              x0=[0.5, 1.5, 2.5], args=(y, oof_non_rounded), 
                              method='Nelder-Mead')
    assert KappaOPtimizer.success, "Optimization did not converge."
    
    oof_tuned = threshold_Rounder(oof_non_rounded, KappaOPtimizer.x)
    tKappa = quadratic_weighted_kappa(y, oof_tuned)

    print(f"----> || Optimized QWK SCORE :: {Fore.CYAN}{Style.BRIGHT} {tKappa:.3f}{Style.RESET_ALL}")

    tpm = test_preds.mean(axis=1)
    tp_rounded = threshold_Rounder(tpm, KappaOPtimizer.x)

    return tp_rounded

imputer = SimpleImputer(strategy='median')

ensemble = VotingRegressor(estimators=[
    ('lgb', Pipeline(steps=[('imputer', imputer), ('regressor', LGBMRegressor(random_state=SEED))])),
    ('xgb', Pipeline(steps=[('imputer', imputer), ('regressor', XGBRegressor(random_state=SEED))])),
    ('cat', Pipeline(steps=[('imputer', imputer), ('regressor', CatBoostRegressor(random_state=SEED, silent=True))])),
    ('rf', Pipeline(steps=[('imputer', imputer), ('regressor', RandomForestRegressor(random_state=SEED))])),
    ('gb', Pipeline(steps=[('imputer', imputer), ('regressor', GradientBoostingRegressor(random_state=SEED))]))
])

Submission3 = TrainML(ensemble, test3)

Training Folds: 100%|██████████| 5/5 [04:06<00:00, 49.25s/it]

Mean Train QWK --> 0.9260
Mean Validation QWK ---> 0.4429
----> || Optimized QWK SCORE :: [36m[1m 0.489[0m





In [25]:
Submission3 = pd.DataFrame({
    'id': sample['id'],
    'sii': Submission3
})

Submission3

Unnamed: 0,id,sii
0,00008ff9,1
1,000fd460,0
2,00105258,0
3,00115b9f,1
4,0016bb22,0
5,001f3379,1
6,0038ba98,0
7,0068a485,0
8,0069fbed,0
9,0083e397,0


In [26]:
sub1 = Submission1
sub2 = Submission2
sub3 = Submission3

sub1 = sub1.sort_values(by='id').reset_index(drop=True)
sub2 = sub2.sort_values(by='id').reset_index(drop=True)
sub3 = sub3.sort_values(by='id').reset_index(drop=True)

combined = pd.DataFrame({
    'id': sub1['id'],
    'sii_3': sub3['sii'],
    'sii_1': sub1['sii'],
    'sii_2': sub2['sii']
    
})

def majority_vote(row):
    return row.mode()[0]

combined['final_sii'] = combined[['sii_1', 'sii_2', 'sii_3']].apply(majority_vote, axis=1)

final_submission = combined[['id', 'final_sii']].rename(columns={'final_sii': 'sii'})

final_submission.to_csv('submission.csv', index=False)

print("Majority voting completed and saved to 'Final_Submission.csv'")

Majority voting completed and saved to 'Final_Submission.csv'


In [27]:
final_submission

Unnamed: 0,id,sii
0,00008ff9,1
1,000fd460,0
2,00105258,0
3,00115b9f,0
4,0016bb22,0
5,001f3379,1
6,0038ba98,1
7,0068a485,0
8,0069fbed,0
9,0083e397,0
