In [1]:
import numpy as np
import pandas as pd
import os
import re
from sklearn.base import clone
from sklearn.metrics import cohen_kappa_score
from sklearn.model_selection import StratifiedKFold
from scipy.optimize import minimize
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from keras.models import Model
from keras.layers import Input, Dense
from keras.optimizers import Adam
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from colorama import Fore, Style
from IPython.display import clear_output
import warnings
from lightgbm import LGBMRegressor
from xgboost import XGBRegressor
from catboost import CatBoostRegressor
from sklearn.ensemble import VotingRegressor, RandomForestRegressor, GradientBoostingRegressor
from sklearn.impute import SimpleImputer, KNNImputer
from sklearn.pipeline import Pipeline
from scipy import stats

In [2]:
# Create output folders
output_folder = 'output'
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

# Create separate analysis output folders
analysis_output_folder = 'analysis_output'
os.makedirs(analysis_output_folder, exist_ok=True)

physical_analysis_output_folder = 'analysis_output/physical'
os.makedirs(physical_analysis_output_folder, exist_ok=True)

fitness_analysis_output_folder = 'analysis_output/fitness'
os.makedirs(fitness_analysis_output_folder, exist_ok=True)

bia_analysis_output_folder = 'analysis_output/bia'
os.makedirs(bia_analysis_output_folder, exist_ok=True)

child_info_analysis_output_folder = 'analysis_output/child_info'
os.makedirs(child_info_analysis_output_folder, exist_ok=True)

actigraphy_analysis_output_folder = 'analysis_output/actigraphy'
os.makedirs(actigraphy_analysis_output_folder, exist_ok=True)

# Set display all columns in dataframes property
pd.options.display.max_columns = None

# Supress warnings
warnings.filterwarnings('ignore')

In [3]:
# Load and process data files
def process_file(filename, dirname):
    df = pd.read_parquet(os.path.join(dirname, filename, 'part-0.parquet'))
    df.drop('step', axis=1, inplace=True)
    return df.describe().values.reshape(-1), filename.split('=')[1]

# Load time series data
def load_time_series(dirname) -> pd.DataFrame:
    ids = os.listdir(dirname)
    
    with ThreadPoolExecutor() as executor:
        results = list(tqdm(executor.map(lambda fname: process_file(fname, dirname), ids), total=len(ids)))
    
    stats, indexes = zip(*results)
    
    df = pd.DataFrame(stats, columns=[f"stat_{i}" for i in range(len(stats[0]))])
    df['id'] = indexes
    return df

In [4]:
# Load data
train = pd.read_csv('input/train.csv')
test = pd.read_csv('input/test.csv')
sample = pd.read_csv('input/sample_submission.csv')

train_ts = load_time_series("input/series_train.parquet")
test_ts = load_time_series("input/series_test.parquet")

df_train = train_ts.drop('id', axis=1)
df_test = test_ts.drop('id', axis=1)

100%|██████████| 996/996 [01:04<00:00, 15.36it/s]
100%|██████████| 2/2 [00:00<00:00,  8.24it/s]


In [5]:
# Sparse Autoencoder Model
class SparseAutoencoder(nn.Module):
    def __init__(self, input_dim, sparsity_weight=1e-5):
        super(SparseAutoencoder, self).__init__()
        self.sparsity_weight = sparsity_weight
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 16),
            nn.ReLU()
        )
        
        self.decoder = nn.Sequential(
            nn.Linear(16, 32),
            nn.ReLU(),
            nn.Linear(32, 64),
            nn.ReLU(),
            nn.Linear(64, input_dim),
            nn.Sigmoid()  # Outputs in the range [0, 1]
        )
        
    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return encoded, decoded

# Preparing Data
# Option to use different scalers: MinMaxScaler, StandardScaler, RobustScaler
def prepare_data(data, scaler_type='MinMaxScaler'):
    if scaler_type == 'StandardScaler':
        scaler = StandardScaler()
    elif scaler_type == 'RobustScaler':
        scaler = RobustScaler()
    else:
        scaler = MinMaxScaler()
    
    data_scaled = scaler.fit_transform(data)
    return torch.tensor(data_scaled, dtype=torch.float32), scaler

# Apply PCA for Dimensionality Reduction
# This can help focus the autoencoder on the most relevant features
def apply_pca(data, n_components=0.95):
    pca = PCA(n_components=n_components)
    data_pca = pca.fit_transform(data)
    return data_pca, pca

# Early Stopping Functionality
def early_stopping(patience):
    class EarlyStopping:
        def __init__(self, patience=patience):
            self.patience = patience
            self.counter = 0
            self.best_loss = float('inf')
            self.early_stop = False
        
        def __call__(self, loss):
            if loss < self.best_loss:
                self.best_loss = loss
                self.counter = 0
            else:
                self.counter += 1
                if self.counter >= self.patience:
                    self.early_stop = True
    return EarlyStopping()

# Training the Sparse Autoencoder with DataFrame Output
def perform_autoencoder(data, epochs=100, batch_size=32, learning_rate=0.001, patience=10, scaler_type='MinMaxScaler', use_pca=False, sparsity_weight=1e-5):
    # Preprocess Data
    if use_pca:
        data, pca = apply_pca(data)

    data_tensor, scaler = prepare_data(data, scaler_type=scaler_type)
    train_data, val_data = train_test_split(data_tensor, test_size=0.2, random_state=42)

    train_loader = DataLoader(TensorDataset(train_data), batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(TensorDataset(val_data), batch_size=batch_size, shuffle=False)

    model = SparseAutoencoder(input_dim=data.shape[1], sparsity_weight=sparsity_weight)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)

    criterion = nn.SmoothL1Loss()  # Changed to Smooth L1 Loss
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    stopper = early_stopping(patience=patience)

    for epoch in range(epochs):
        model.train()
        train_loss = 0.0
        for batch in train_loader:
            batch = batch[0].to(device)
            optimizer.zero_grad()
            encoded, outputs = model(batch)
            
            # Reconstruction loss
            loss = criterion(outputs, batch)
            
            # Sparsity penalty (L1 regularization on encoded activations)
            l1_penalty = torch.mean(torch.abs(encoded))
            loss += sparsity_weight * l1_penalty
            
            loss.backward()
            optimizer.step()
            train_loss += loss.item() * batch.size(0)

        train_loss /= len(train_loader.dataset)

        # Validation
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for batch in val_loader:
                batch = batch[0].to(device)
                _, outputs = model(batch)
                loss = criterion(outputs, batch)
                val_loss += loss.item() * batch.size(0)

        val_loss /= len(val_loader.dataset)
        print(f"Epoch {epoch+1}, Train Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}")

        # Early stopping
        stopper(val_loss)
        if stopper.early_stop:
            print(f"Early stopping at epoch {epoch + 1}")
            break

    # Convert tensor back to DataFrame for consistency
    _, data_decoded = model(data_tensor.to(device))
    data_decoded = data_decoded.cpu().detach().numpy()
    df_encoded = pd.DataFrame(data_decoded, columns=[f'feature_{i}' for i in range(data_decoded.shape[1])])
    return df_encoded

# Usage example
# Assuming 'data' is your input dataset as a NumPy array or pandas DataFrame.
# df_encoded = train_sparse_autoencoder(data, epochs=100, batch_size=32, learning_rate=0.001, patience=10, scaler_type='StandardScaler', use_pca=True, sparsity_weight=1e-5)


In [6]:
def feature_engineering(df):
    season_cols = [col for col in df.columns if 'Season' in col]
    df = df.drop(season_cols, axis=1)
     
    # Combine all grip strength
    df['FGC-FGC_GS'] = df['FGC-FGC_GSD_Zone'] + df['FGC-FGC_GSND_Zone']
    
    # Combine all sit and reach
    df['FGC-FGC_SR'] = df['FGC-FGC_SRL_Zone'] + df['FGC-FGC_SRR_Zone']
    
    # Create a fitness score by adding the zone fitness data
    df['fitness_score'] = df['FGC-FGC_GS'] + df['FGC-FGC_SR'] + df['FGC-FGC_CU_Zone'] + df['FGC-FGC_PU_Zone'] + df['FGC-FGC_TL_Zone']
    
    # Combine PAQ_A-PAQ_A_Total and PAQ_C-PAQ_C_Total into one column
    df['PAQ_Total'] = df['PAQ_A-PAQ_A_Total'].combine_first(df['PAQ_C-PAQ_C_Total'])
    
    # Features from other notebook
    df['BMI_Age'] = df['Physical-BMI'] * df['Basic_Demos-Age']
    df['Internet_Hours_Age'] = df['PreInt_EduHx-computerinternet_hoursday'] * df['Basic_Demos-Age']
    df['BMI_Internet_Hours'] = df['Physical-BMI'] * df['PreInt_EduHx-computerinternet_hoursday']
    df['BFP_BMI'] = df['BIA-BIA_Fat'] / df['BIA-BIA_BMI']
    df['FFMI_BFP'] = df['BIA-BIA_FFMI'] / df['BIA-BIA_Fat']
    df['FMI_BFP'] = df['BIA-BIA_FMI'] / df['BIA-BIA_Fat']
    df['LST_TBW'] = df['BIA-BIA_LST'] / df['BIA-BIA_TBW']
    df['BFP_BMR'] = df['BIA-BIA_Fat'] * df['BIA-BIA_BMR']
    df['BFP_DEE'] = df['BIA-BIA_Fat'] * df['BIA-BIA_DEE']
    df['BMR_Weight'] = df['BIA-BIA_BMR'] / df['Physical-Weight']
    df['DEE_Weight'] = df['BIA-BIA_DEE'] / df['Physical-Weight']
    df['SMM_Height'] = df['BIA-BIA_SMM'] / df['Physical-Height']
    df['Muscle_to_Fat'] = df['BIA-BIA_SMM'] / df['BIA-BIA_FMI']
    df['Hydration_Status'] = df['BIA-BIA_TBW'] / df['Physical-Weight']
    df['ICW_TBW'] = df['BIA-BIA_ICW'] / df['BIA-BIA_TBW']
    df['Age_Weight'] = df['Basic_Demos-Age'] * df['Physical-Weight']
    df['Sex_BMI'] = df['Basic_Demos-Sex'] * df['Physical-BMI']
    df['Sex_HeartRate'] = df['Basic_Demos-Sex'] * df['Physical-HeartRate']
    df['Age_WaistCirc'] = df['Basic_Demos-Age'] * df['Physical-Waist_Circumference']
    df['BMI_FitnessMaxStage'] = df['Physical-BMI'] * df['Fitness_Endurance-Max_Stage']
    df['Weight_GripStrengthDominant'] = df['Physical-Weight'] * df['FGC-FGC_GSD']
    df['Weight_GripStrengthNonDominant'] = df['Physical-Weight'] * df['FGC-FGC_GSND']
    df['HeartRate_FitnessTime'] = df['Physical-HeartRate'] * (df['Fitness_Endurance-Time_Mins'] + df['Fitness_Endurance-Time_Sec'])
    df['Age_PushUp'] = df['Basic_Demos-Age'] * df['FGC-FGC_PU']
    df['FFMI_Age'] = df['BIA-BIA_FFMI'] * df['Basic_Demos-Age']
    df['InternetUse_SleepDisturbance'] = df['PreInt_EduHx-computerinternet_hoursday'] * df['SDS-SDS_Total_Raw']
    df['CGAS_BMI'] = df['CGAS-CGAS_Score'] * df['Physical-BMI']
    df['CGAS_FitnessMaxStage'] = df['CGAS-CGAS_Score'] * df['Fitness_Endurance-Max_Stage']
    
    return df

In [7]:
# Encode time series data
train_ts_encoded = perform_autoencoder(df_train, epochs=100, batch_size=32, learning_rate=0.001, patience=10, use_pca=False, scaler_type='MinMaxScaler', sparsity_weight=1e-5)
test_ts_encoded = perform_autoencoder(df_test, epochs=100, batch_size=32, learning_rate=0.001, patience=10, use_pca=False, scaler_type='MinMaxScaler', sparsity_weight=1e-5)

train_ts_encoded["id"]=train_ts["id"]
test_ts_encoded['id']=test_ts["id"]

# Merge data
train = pd.merge(train, train_ts_encoded, how="left", on='id')
test = pd.merge(test, test_ts_encoded, how="left", on='id')

train = train.drop('id', axis=1)
test = test.drop('id', axis=1)

Epoch 1, Train Loss: 0.0466, Validation Loss: 0.0373
Epoch 2, Train Loss: 0.0249, Validation Loss: 0.0190
Epoch 3, Train Loss: 0.0186, Validation Loss: 0.0183
Epoch 4, Train Loss: 0.0182, Validation Loss: 0.0181
Epoch 5, Train Loss: 0.0181, Validation Loss: 0.0180
Epoch 6, Train Loss: 0.0180, Validation Loss: 0.0181
Epoch 7, Train Loss: 0.0178, Validation Loss: 0.0178
Epoch 8, Train Loss: 0.0175, Validation Loss: 0.0172
Epoch 9, Train Loss: 0.0163, Validation Loss: 0.0150
Epoch 10, Train Loss: 0.0138, Validation Loss: 0.0128
Epoch 11, Train Loss: 0.0125, Validation Loss: 0.0118
Epoch 12, Train Loss: 0.0112, Validation Loss: 0.0099
Epoch 13, Train Loss: 0.0092, Validation Loss: 0.0078
Epoch 14, Train Loss: 0.0076, Validation Loss: 0.0067
Epoch 15, Train Loss: 0.0068, Validation Loss: 0.0061
Epoch 16, Train Loss: 0.0065, Validation Loss: 0.0059
Epoch 17, Train Loss: 0.0063, Validation Loss: 0.0059
Epoch 18, Train Loss: 0.0062, Validation Loss: 0.0058
Epoch 19, Train Loss: 0.0062, Validat

In [8]:
# Skew removal for some columns
skewed_columns = [
    'BIA-BIA_BMC', 'BIA-BIA_BMR', 'BIA-BIA_DEE', 'BIA-BIA_ECW', 'BIA-BIA_Fat',
    'BIA-BIA_FFM', 'BIA-BIA_FFMI', 'BIA-BIA_FMI', 'BIA-BIA_ICW', 'BIA-BIA_LDM', 'BIA-BIA_LST', 
    'BIA-BIA_TBW', 'CGAS-CGAS_Score', 'feature_23', 'feature_35', 'feature_38', 'feature_40', 'feature_47',
    'feature_54', 'feature_66', 'feature_78', 'feature_80', 'feature_88', 'feature_90'
]
lambda_params = {}

# Define the box-cox function to remove skew
def box_cox_transform(df, column, lambda_param=None):
    # Create a copy of the dataframe
    df_copy = df.copy()
    
    # Drop NaN values for the specific column
    df_copy = df_copy.dropna(subset=[column])
    
    # Ensure all values are positive
    min_value = df_copy[column].min()
    if min_value <= 0:
        df_copy[column] = df_copy[column] - min_value + 1  # Add 1 to ensure all values are positive
    
    # Perform Box-Cox transformation
    if lambda_param is None:
        df_copy[f'{column}_boxcox'], lambda_param = stats.boxcox(df_copy[column])
        print(f"Transforming column: {column}")
        print(f"Optimal lambda for Box-Cox transformation: {lambda_param}")
    else:
        df_copy[f'{column}_boxcox'] = stats.boxcox(df_copy[column], lmbda=lambda_param)
        print(f"Applying transformation to column: {column} with lambda: {lambda_param}")
    
    print(f"Number of rows before transformation: {len(df)}")
    print(f"Number of rows after removing NaN values: {len(df_copy)}")
    
    return df_copy, lambda_param

# Apply Box-Cox transformation to train data and store lambda values
for column in skewed_columns:
    transformed_train_data, lambda_params[column] = box_cox_transform(train, column)
    # Update only the new transformed column in the original dataframe
    train[f'{column}_boxcox'] = transformed_train_data[f'{column}_boxcox']

# Apply the same transformation to test data using stored lambda values
for column in skewed_columns:
    transformed_test_data, _ = box_cox_transform(test, column, lambda_param=lambda_params[column])
    # Update only the new transformed column in the original dataframe
    test[f'{column}_boxcox'] = transformed_test_data[f'{column}_boxcox']

# Function to handle infinite values
def replace_inf_with_max(df):
    for column in df.columns:
        if df[column].dtype == 'float64':
            max_value = df[column][~np.isinf(df[column])].max()
            df[column] = df[column].replace([np.inf, -np.inf], max_value)
    return df

# Replace infinite values with the maximum non-infinite value in each column
train_data = replace_inf_with_max(train)
test_data = replace_inf_with_max(test)

Transforming column: BIA-BIA_BMC
Optimal lambda for Box-Cox transformation: -0.26544288750244394
Number of rows before transformation: 3960
Number of rows after removing NaN values: 1991
Transforming column: BIA-BIA_BMR
Optimal lambda for Box-Cox transformation: -2.024016452566404
Number of rows before transformation: 3960
Number of rows after removing NaN values: 1991
Transforming column: BIA-BIA_DEE
Optimal lambda for Box-Cox transformation: -0.9862196352522961
Number of rows before transformation: 3960
Number of rows after removing NaN values: 1991
Transforming column: BIA-BIA_ECW
Optimal lambda for Box-Cox transformation: -0.11312798067663181
Number of rows before transformation: 3960
Number of rows after removing NaN values: 1991
Transforming column: BIA-BIA_Fat
Optimal lambda for Box-Cox transformation: 27.718481796974547
Number of rows before transformation: 3960
Number of rows after removing NaN values: 1991
Transforming column: BIA-BIA_FFM
Optimal lambda for Box-Cox transforma

In [9]:
imputer = KNNImputer(n_neighbors=5)
numeric_cols = train.select_dtypes(include=['float64', 'int64']).columns
imputed_data = imputer.fit_transform(train[numeric_cols])
train_imputed = pd.DataFrame(imputed_data, columns=numeric_cols)
train_imputed['sii'] = train_imputed['sii'].round().astype(int)
for col in train.columns:
    if col not in numeric_cols:
        train_imputed[col] = train[col]
        
train = train_imputed

train = feature_engineering(train)
train = train.dropna(thresh=10, axis=0)
test = feature_engineering(test)

In [10]:
train.head(10)

Unnamed: 0,Basic_Demos-Age,Basic_Demos-Sex,CGAS-CGAS_Score,Physical-BMI,Physical-Height,Physical-Weight,Physical-Waist_Circumference,Physical-Diastolic_BP,Physical-HeartRate,Physical-Systolic_BP,Fitness_Endurance-Max_Stage,Fitness_Endurance-Time_Mins,Fitness_Endurance-Time_Sec,FGC-FGC_CU,FGC-FGC_CU_Zone,FGC-FGC_GSND,FGC-FGC_GSND_Zone,FGC-FGC_GSD,FGC-FGC_GSD_Zone,FGC-FGC_PU,FGC-FGC_PU_Zone,FGC-FGC_SRL,FGC-FGC_SRL_Zone,FGC-FGC_SRR,FGC-FGC_SRR_Zone,FGC-FGC_TL,FGC-FGC_TL_Zone,BIA-BIA_Activity_Level_num,BIA-BIA_BMC,BIA-BIA_BMI,BIA-BIA_BMR,BIA-BIA_DEE,BIA-BIA_ECW,BIA-BIA_FFM,BIA-BIA_FFMI,BIA-BIA_FMI,BIA-BIA_Fat,BIA-BIA_Frame_num,BIA-BIA_ICW,BIA-BIA_LDM,BIA-BIA_LST,BIA-BIA_SMM,BIA-BIA_TBW,PAQ_A-PAQ_A_Total,PAQ_C-PAQ_C_Total,PCIAT-PCIAT_01,PCIAT-PCIAT_02,PCIAT-PCIAT_03,PCIAT-PCIAT_04,PCIAT-PCIAT_05,PCIAT-PCIAT_06,PCIAT-PCIAT_07,PCIAT-PCIAT_08,PCIAT-PCIAT_09,PCIAT-PCIAT_10,PCIAT-PCIAT_11,PCIAT-PCIAT_12,PCIAT-PCIAT_13,PCIAT-PCIAT_14,PCIAT-PCIAT_15,PCIAT-PCIAT_16,PCIAT-PCIAT_17,PCIAT-PCIAT_18,PCIAT-PCIAT_19,PCIAT-PCIAT_20,PCIAT-PCIAT_Total,SDS-SDS_Total_Raw,SDS-SDS_Total_T,PreInt_EduHx-computerinternet_hoursday,sii,BIA-BIA_BMC_boxcox,BIA-BIA_BMR_boxcox,BIA-BIA_DEE_boxcox,BIA-BIA_ECW_boxcox,BIA-BIA_Fat_boxcox,BIA-BIA_FFM_boxcox,BIA-BIA_FFMI_boxcox,BIA-BIA_FMI_boxcox,BIA-BIA_ICW_boxcox,BIA-BIA_LDM_boxcox,BIA-BIA_LST_boxcox,BIA-BIA_TBW_boxcox,CGAS-CGAS_Score_boxcox,feature_0,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,feature_10,feature_11,feature_12,feature_13,feature_14,feature_15,feature_16,feature_17,feature_18,feature_19,feature_20,feature_21,feature_22,feature_23,feature_24,feature_25,feature_26,feature_27,feature_28,feature_29,feature_30,feature_31,feature_32,feature_33,feature_34,feature_35,feature_36,feature_37,feature_38,feature_39,feature_40,feature_41,feature_42,feature_43,feature_44,feature_45,feature_46,feature_47,feature_48,feature_49,feature_50,feature_51,feature_52,feature_53,feature_54,feature_55,feature_56,feature_57,feature_58,feature_59,feature_60,feature_61,feature_62,feature_63,feature_64,feature_65,feature_66,feature_67,feature_68,feature_69,feature_70,feature_71,feature_72,feature_73,feature_74,feature_75,feature_76,feature_77,feature_78,feature_79,feature_80,feature_81,feature_82,feature_83,feature_84,feature_85,feature_86,feature_87,feature_88,feature_89,feature_90,feature_91,feature_92,feature_93,feature_94,feature_95,feature_23_boxcox,feature_35_boxcox,feature_38_boxcox,feature_40_boxcox,feature_47_boxcox,feature_54_boxcox,feature_66_boxcox,feature_78_boxcox,feature_80_boxcox,feature_88_boxcox,feature_90_boxcox,FGC-FGC_GS,FGC-FGC_SR,fitness_score,PAQ_Total,BMI_Age,Internet_Hours_Age,BMI_Internet_Hours,BFP_BMI,FFMI_BFP,FMI_BFP,LST_TBW,BFP_BMR,BFP_DEE,BMR_Weight,DEE_Weight,SMM_Height,Muscle_to_Fat,Hydration_Status,ICW_TBW,Age_Weight,Sex_BMI,Sex_HeartRate,Age_WaistCirc,BMI_FitnessMaxStage,Weight_GripStrengthDominant,Weight_GripStrengthNonDominant,HeartRate_FitnessTime,Age_PushUp,FFMI_Age,InternetUse_SleepDisturbance,CGAS_BMI,CGAS_FitnessMaxStage
0,5.0,0.0,51.0,16.877316,46.0,50.8,28.0,68.6,83.8,111.6,4.2,4.8,39.2,0.0,0.0,18.58,2.6,29.06,1.6,0.0,0.0,7.0,0.0,6.0,0.0,6.0,1.0,2.0,2.66855,16.8792,932.498,1492.0,8.25598,41.5862,13.8177,3.06143,9.21377,1.0,24.4349,8.89536,38.9177,19.5413,32.6909,3.1638,2.5882,5.0,4.0,4.0,0.0,4.0,0.0,0.0,4.0,0.0,0.0,4.0,0.0,4.0,4.0,4.0,4.0,4.0,4.0,2.0,4.0,55.0,42.0,56.6,3.0,2,1.795346,0.494067,1.013221,1.877806,6.776256e+107,1.210694,0.378516,13083250000000.0,0.836481,1.443716,1.461227,1.296982,2.887303,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4.2,0.0,5.2,3.1638,84.386578,15.0,50.631947,0.545865,1.499679,0.332267,1.190475,8591.822097,13746.94484,18.35626,29.370079,0.424811,6.383063,0.643522,0.747453,254.0,0.0,0.0,140.0,70.884726,1476.248,943.864,3687.2,0.0,69.0885,126.0,860.7431,214.2
1,9.0,0.0,67.2,14.03559,48.0,46.0,22.0,75.0,70.0,122.0,4.2,4.8,39.2,3.0,0.0,22.98,1.4,21.46,1.8,5.0,0.0,11.0,1.0,11.0,1.0,3.0,0.0,2.0,2.57949,14.0371,936.656,1498.65,6.01993,42.0291,12.8254,1.21172,3.97085,1.0,21.0352,14.974,39.4497,15.4107,27.0552,2.6258,2.34,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,46.0,64.0,0.0,0,1.791257,0.494067,1.013225,1.624546,6.664675e+107,1.211268,0.378436,12361790000000.0,0.832549,1.635205,1.462669,1.279664,3.02337,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3.2,2.0,5.2,2.6258,126.320313,0.0,0.0,0.282883,3.229888,0.305154,1.458119,3719.320478,5950.914352,20.362087,32.579348,0.321056,12.718037,0.588157,0.777492,414.0,0.0,0.0,198.0,58.949479,987.16,1057.08,3080.0,45.0,115.4286,0.0,943.191667,282.24
2,10.0,1.0,71.0,16.648696,56.5,75.6,28.0,65.0,94.0,117.0,5.0,7.0,33.0,20.0,1.0,10.2,1.0,14.7,2.0,7.0,1.0,10.0,1.0,10.0,1.0,5.0,0.0,3.2,3.66177,17.33532,1195.051,2208.25,17.386128,69.5518,13.87402,3.461298,17.488204,1.2,33.03372,19.13194,65.89,32.59526,50.41984,2.028,2.17,5.0,2.0,2.0,1.0,2.0,1.0,1.0,2.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,2.0,2.0,1.0,1.0,28.0,38.0,54.0,2.0,0,1.835777,0.494067,1.013444,2.394988,6.956961e+107,1.232235,0.378512,13246280000000.0,0.842177,1.698815,1.506082,1.326553,3.054867,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3.0,2.0,7.0,2.028,166.486961,20.0,33.297392,1.008819,0.793336,0.197922,1.306827,20899.295678,38618.326483,15.807553,29.209656,0.576907,9.417063,0.666929,0.655173,756.0,16.648696,94.0,280.0,83.24348,1111.32,771.12,3760.0,70.0,138.7402,76.0,1182.05742,355.0
3,9.0,0.0,71.0,18.292347,56.0,81.6,26.8,60.0,97.0,117.0,6.0,9.0,37.0,18.0,1.0,18.94,1.8,28.88,2.2,5.0,0.0,7.0,0.0,7.0,0.0,7.0,1.0,3.0,3.84191,18.2943,1131.43,1923.44,15.5925,62.7757,14.074,4.22033,18.8243,2.0,30.4041,16.779,58.9338,26.4798,45.9966,2.3802,2.451,4.0,2.0,4.0,0.0,5.0,1.0,0.0,3.0,2.0,2.0,3.0,0.0,3.0,0.0,0.0,3.0,4.0,3.0,4.0,1.0,44.0,31.0,45.0,0.0,1,1.845723,0.494067,1.013388,2.360996,6.985483e+107,1.229847,0.378534,13552920000000.0,0.841114,1.671884,1.5003,1.322946,3.054867,0.076294,0.082646,0.085511,0.078161,0.080426,0.079455,0.075918,0.075467,0.082875,0.079193,0.078242,0.078936,0.447151,0.453177,0.449156,0.397713,0.471151,0.000728,0.176714,0.784058,0.564329,0.619829,0.74061,0.202422,0.693032,0.66668,0.542186,0.370708,0.444183,0.000735,0.332641,0.376667,0.419758,0.660591,0.30263,0.205993,0.770361,0.56733,0.993031,4e-06,0.029667,0.0001881914,0.0001563678,0.668979,0.01015315,0.014863,0.518875,0.197702,0.324712,0.301765,0.196553,0.187392,0.25768,6.520164e-10,0.014419,0.697316,0.541301,0.33607,0.68268,0.195066,0.430225,0.501904,0.416306,0.415418,0.425836,2.48608e-08,0.059744,0.758293,0.664854,0.542784,0.801198,0.196694,0.525594,0.64803,0.619398,0.461347,0.582007,4.000602e-07,0.052015,0.839808,0.591514,0.743046,0.825986,0.203474,0.171029,0.170737,0.210966,0.324708,0.997734,8.7e-05,0.124608,0.097867,0.986421,0.991002,0.864797,0.225604,-1.528569,-6.68858,-0.005905,-4.413096,-1.175718,-4.464273,-3.285241,-5.297307,-2.703727,-0.002039,-0.570888,4.0,0.0,6.0,2.3802,164.631122,0.0,0.0,1.028971,0.747651,0.224196,1.281264,21298.377749,36207.411592,13.865564,23.571569,0.472854,6.274343,0.563684,0.661008,734.4,0.0,0.0,241.2,109.754082,2356.608,1545.504,4462.0,45.0,126.666,0.0,1298.756633,426.0
4,18.0,1.0,70.6,16.570716,53.05,68.36,28.0,67.6,85.2,110.6,4.8,9.0,43.8,12.0,0.4,20.74,2.0,21.08,1.8,16.0,0.4,8.6,0.6,7.7,0.4,9.1,0.6,3.2,3.66177,17.33532,1195.051,2208.25,17.386128,69.5518,13.87402,3.461298,17.488204,1.2,33.03372,19.13194,65.89,32.59526,50.41984,1.04,2.3418,1.0,1.0,1.2,0.4,0.8,0.6,0.0,0.0,0.4,0.2,0.6,0.0,0.4,0.2,1.0,0.6,0.4,0.4,0.4,0.2,13.6,33.8,48.4,1.8,0,1.835777,0.494067,1.013444,2.394988,6.956961e+107,1.232235,0.378512,13246280000000.0,0.842177,1.698815,1.506082,1.326553,3.0465,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3.8,1.0,6.2,1.04,298.272883,32.4,29.827288,1.008819,0.793336,0.197922,1.306827,20899.295678,38618.326483,17.481729,32.303248,0.614425,9.417063,0.737563,0.655173,1230.48,16.570716,85.2,504.0,79.539435,1441.0288,1417.7864,4498.56,288.0,249.73236,60.84,1169.89253,338.88
5,13.0,1.0,50.0,22.279952,59.5,112.2,25.0,60.0,73.0,102.0,4.2,4.8,39.2,12.0,0.0,16.5,2.0,17.9,2.0,6.0,0.0,10.0,1.0,11.0,1.0,8.0,0.0,2.0,4.33036,30.1865,1330.97,1996.45,30.2124,84.0285,16.6877,13.4988,67.9715,2.0,32.9141,20.902,79.6982,35.3804,63.1265,2.414,4.11,3.0,3.0,3.0,0.0,2.0,1.0,0.0,2.0,2.0,1.0,0.0,1.0,3.0,3.0,2.0,1.0,3.0,1.0,2.0,1.0,34.0,40.0,56.0,0.0,1,1.864977,0.494067,1.013409,2.828091,8.156581e+107,1.240129,0.378661,17846420000000.0,0.842524,1.738032,1.523098,1.342026,2.876979,0.498988,0.501773,0.493171,0.500375,0.499162,0.476953,0.495527,0.513456,0.511584,0.499221,0.490311,0.504044,0.531795,0.475249,0.242572,0.068737,0.236166,0.649013,0.046644,0.37371,0.311003,0.611957,0.71371,0.239376,0.411626,0.437622,0.621426,0.103925,0.5892,0.999728,0.10257,0.652663,0.727241,0.744415,0.305684,0.137408,0.831927,0.675527,0.989335,0.004577,0.042237,4.591873e-07,1.995214e-07,1.7e-05,6.822056e-07,0.003998,0.43502,0.233475,0.622418,0.419635,0.03519,0.031253,0.048372,0.03263406,0.033739,0.339239,0.155479,0.239701,0.621138,0.246743,0.541549,0.50572,0.111579,0.043092,0.166232,0.9810914,0.051511,0.269699,0.439416,0.533667,0.739028,0.232943,0.411876,0.530517,0.344469,0.053627,0.35912,0.9999931,0.032734,0.436929,0.570331,0.833242,0.812124,0.231571,0.120128,0.133278,0.14858,0.214379,0.985486,1.0,0.102537,0.103484,0.999985,0.997727,0.857934,0.241777,-1.374412,-12.996469,-0.008307,-3.877758,-1.088516,-3.531964,-3.486908,-6.770012,-3.302046,-0.007867,-0.575691,4.0,2.0,6.0,2.414,289.639376,0.0,0.0,2.251718,0.24551,0.198595,1.262516,90468.027355,135701.701175,11.862478,17.793672,0.594629,2.621003,0.562625,0.521399,1458.6,22.279952,73.0,325.0,93.575798,2008.38,1851.3,3212.0,78.0,216.9401,0.0,1113.997599,210.0
6,10.0,0.0,59.2,19.66076,55.0,84.6,26.2,123.0,83.0,163.0,4.2,4.8,39.2,9.0,1.0,22.178,2.2,23.952,2.4,2.0,0.0,11.0,1.0,11.0,1.0,11.0,1.0,2.0,3.78271,19.6629,1135.86,1817.38,16.3275,63.247,14.7,4.96291,21.353,2.0,30.8936,16.0259,59.4643,26.1957,47.2211,2.3802,3.67,1.0,4.0,1.0,0.0,2.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,1.0,4.0,1.0,0.0,20.0,27.0,40.0,3.0,0,1.843325,0.494067,1.013354,2.394666,7.041561e+107,1.230141,0.378572,13861210000000.0,0.841408,1.65729,1.501039,1.324696,2.940062,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4.6,2.0,8.6,2.3802,196.607603,30.0,58.982281,1.085954,0.688428,0.232422,1.259274,24254.01858,38806.51514,13.426241,21.482033,0.476285,5.278294,0.558169,0.654233,846.0,0.0,0.0,262.0,82.575193,2026.3392,1876.2588,3652.0,20.0,147.0,81.0,1163.917012,248.64
7,10.0,1.0,60.8,16.861286,59.25,84.2,27.0,71.0,90.0,116.0,4.2,4.8,39.2,0.0,0.0,12.6,2.0,11.1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,3.0,4.05726,16.8631,1180.04,1888.06,21.94,67.9527,13.6092,3.25395,16.2474,2.0,28.5367,17.476,63.8954,28.768,50.4767,1.936,1.27,3.0,2.6,3.0,1.0,3.8,0.8,1.4,1.2,1.6,1.4,2.2,0.2,2.0,1.8,1.4,2.2,3.4,1.4,2.4,1.0,36.4,42.4,66.8,2.0,1,1.854326,0.494067,1.013377,2.606525,6.92878e+107,1.232877,0.378501,13160320000000.0,0.83989,1.684591,1.506823,1.328996,2.966215,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3.0,0.0,3.0,1.936,168.612865,20.0,33.722573,0.963488,0.837623,0.200275,1.265839,19172.581896,30676.066044,14.014727,22.423515,0.485536,8.840947,0.599486,0.565344,842.0,16.861286,90.0,270.0,70.817403,934.62,1060.92,3960.0,0.0,136.092,84.8,1025.166217,255.36
8,15.0,0.0,65.0,16.570716,49.4,114.8,28.0,69.8,82.4,108.2,5.6,6.6,42.4,8.8,0.2,19.62,2.4,21.2,1.8,12.2,0.8,10.9,0.8,10.4,0.8,8.9,0.8,3.2,3.66177,17.33532,1195.051,2208.25,17.386128,69.5518,13.87402,3.461298,17.488204,1.2,33.03372,19.13194,65.89,32.59526,50.41984,2.436,2.3418,0.6,1.6,0.8,0.0,1.0,0.4,0.0,0.0,0.4,0.0,0.8,0.0,0.4,0.0,0.6,0.4,0.0,0.4,0.0,0.2,9.8,34.2,48.8,2.0,0,1.835777,0.494067,1.013444,2.394988,6.956961e+107,1.232235,0.378512,13246280000000.0,0.842177,1.698815,1.506082,1.326553,2.99102,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4.2,1.6,7.6,2.436,248.560736,30.0,33.141431,1.008819,0.793336,0.197922,1.306827,20899.295678,38618.326483,10.409852,19.235627,0.659823,9.417063,0.439197,0.655173,1722.0,0.0,0.0,420.0,92.796008,2433.76,2252.376,4037.6,183.0,208.1103,68.4,1077.096522,364.0
9,19.0,1.0,56.4,20.889514,53.4,118.88,28.0,71.4,85.2,109.0,4.8,9.0,43.8,12.0,0.4,20.74,2.0,21.08,1.8,16.0,0.4,8.6,0.6,7.7,0.4,9.1,0.6,3.2,3.66177,17.33532,1195.051,2208.25,17.386128,69.5518,13.87402,3.461298,17.488204,1.2,33.03372,19.13194,65.89,32.59526,50.41984,2.028,2.3418,1.0,1.0,0.6,0.4,0.8,0.6,0.0,0.0,0.4,0.2,0.4,0.0,0.8,0.2,0.6,0.6,0.4,0.4,0.4,0.6,13.6,38.2,54.0,1.8,0,1.835777,0.494067,1.013444,2.394988,6.956961e+107,1.232235,0.378512,13246280000000.0,0.842177,1.698815,1.506082,1.326553,2.931482,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3.8,1.0,6.2,2.028,396.900772,34.2,37.601126,1.008819,0.793336,0.197922,1.306827,20899.295678,38618.326483,10.052582,18.575454,0.610398,9.417063,0.424124,0.655173,2258.72,20.889514,85.2,532.0,100.269669,2505.9904,2465.5712,4498.56,304.0,263.60638,68.76,1178.168608,270.72


In [11]:
# Export train_data to CSV
train_output_path = os.path.join(output_folder, 'train_data_imputed.csv')
train.to_csv(train_output_path, index=False)
print(f"Imputed train data exported to: {train_output_path}")

# Export test_data to CSV
test_output_path = os.path.join(output_folder, 'test_data_imputed.csv')
test.to_csv(test_output_path, index=False)
print(f"Imputed test data exported to: {test_output_path}")

print("Data export completed.")

Imputed train data exported to: output\train_data_imputed.csv
Imputed test data exported to: output\test_data_imputed.csv
Data export completed.


In [12]:
# Feature selection
time_series_cols = [
    'feature_0', 'feature_1', 'feature_2', 'feature_3', 'feature_4',
    'feature_5', 'feature_6', 'feature_7', 'feature_8', 'feature_9',
    'feature_10', 'feature_11', 'feature_12', 'feature_13', 'feature_14',
    'feature_15', 'feature_16', 'feature_17', 'feature_18', 'feature_19',
    'feature_20', 'feature_21', 'feature_22', 'feature_24',
    'feature_25', 'feature_26', 'feature_27', 'feature_28', 'feature_29',
    'feature_30', 'feature_31', 'feature_32', 'feature_33', 'feature_34',
    'feature_36', 'feature_37', 'feature_39',
    'feature_41', 'feature_42', 'feature_43', 'feature_44',
    'feature_45', 'feature_46', 'feature_48', 'feature_49',
    'feature_50', 'feature_51', 'feature_52', 'feature_53', 
    'feature_55', 'feature_56', 'feature_57', 'feature_58', 'feature_59',
    'feature_60', 'feature_61', 'feature_62', 'feature_63', 'feature_64',
    'feature_65', 'feature_67', 'feature_68', 'feature_69',
    'feature_70', 'feature_71', 'feature_72', 'feature_73', 'feature_74',
    'feature_75', 'feature_76', 'feature_77', 'feature_79',
    'feature_81', 'feature_82', 'feature_83', 'feature_84',
    'feature_85', 'feature_86', 'feature_87', 'feature_89',
    'feature_91', 'feature_92', 'feature_93', 'feature_94',
    'feature_95', 'feature_23_boxcox', 'feature_35_boxcox', 'feature_38_boxcox',
    'feature_40_boxcox', 'feature_47_boxcox', 'feature_54_boxcox',
    'feature_66_boxcox', 'feature_78_boxcox', 'feature_80_boxcox',
    'feature_88_boxcox', 'feature_90_boxcox'
]

# Dropped columns 'FGC-FGC_CU', 'FGC-FGC_PU', 'FGC-FGC_SRR', 'FGC-FGC_SRR_Zone',
featuresCols = ['Basic_Demos-Age', 'Basic_Demos-Sex', 'FGC-FGC_GS', 'FGC-FGC_SR',
                'CGAS-CGAS_Score', 'Physical-BMI', 'fitness_score',
                'Physical-Height', 'Physical-Weight', 'Physical-Waist_Circumference',
                'Physical-Diastolic_BP', 'Physical-HeartRate', 'Physical-Systolic_BP',
                'Fitness_Endurance-Max_Stage',
                'Fitness_Endurance-Time_Mins', 'Fitness_Endurance-Time_Sec',
                'FGC-FGC_CU_Zone', 'FGC-FGC_GSND',
                'FGC-FGC_GSND_Zone', 'FGC-FGC_GSD', 'FGC-FGC_GSD_Zone', 
                'FGC-FGC_PU_Zone', 'FGC-FGC_SRL', 'FGC-FGC_SRL_Zone', 
                'FGC-FGC_TL', 'FGC-FGC_TL_Zone',
                'BIA-BIA_Activity_Level_num', 'BIA-BIA_BMC', 'BIA-BIA_BMI',
                'BIA-BIA_BMR', 'BIA-BIA_DEE', 'BIA-BIA_ECW', 'BIA-BIA_FFM',
                'BIA-BIA_FFMI', 'BIA-BIA_FMI', 'BIA-BIA_Fat', 'BIA-BIA_Frame_num',
                'BIA-BIA_ICW', 'BIA-BIA_LDM', 'BIA-BIA_LST', 'BIA-BIA_SMM',
                'BIA-BIA_TBW', 'PAQ_A-PAQ_A_Total', 'PAQ_Total',
                'PAQ_C-PAQ_C_Total', 'SDS-SDS_Total_Raw',
                'SDS-SDS_Total_T',
                'PreInt_EduHx-computerinternet_hoursday', 'sii', 'BMI_Age','Internet_Hours_Age','BMI_Internet_Hours',
                'BFP_BMI', 'FFMI_BFP', 'FMI_BFP', 'LST_TBW', 'BFP_BMR', 'BFP_DEE', 'BMR_Weight', 'DEE_Weight',
                'SMM_Height', 'Muscle_to_Fat', 'Hydration_Status', 'ICW_TBW','Age_Weight','Sex_BMI','Sex_HeartRate','Age_WaistCirc',
                'BMI_FitnessMaxStage','Weight_GripStrengthDominant','Weight_GripStrengthNonDominant','HeartRate_FitnessTime',
                'Age_PushUp','FFMI_Age','InternetUse_SleepDisturbance','CGAS_BMI','CGAS_FitnessMaxStage']

featuresCols += time_series_cols

train = train[featuresCols]
train = train.dropna(subset='sii')

# Dropped columns 'FGC-FGC_CU', 'FGC-FGC_PU', 'FGC-FGC_SRR', 'FGC-FGC_SRR_Zone',
featuresCols = ['Basic_Demos-Age', 'Basic_Demos-Sex', 'FGC-FGC_GS', 'FGC-FGC_SR',
                'CGAS-CGAS_Score', 'Physical-BMI', 'fitness_score',
                'Physical-Height', 'Physical-Weight', 'Physical-Waist_Circumference',
                'Physical-Diastolic_BP', 'Physical-HeartRate', 'Physical-Systolic_BP',
                'Fitness_Endurance-Max_Stage',
                'Fitness_Endurance-Time_Mins', 'Fitness_Endurance-Time_Sec',
                'FGC-FGC_CU_Zone', 'FGC-FGC_GSND',
                'FGC-FGC_GSND_Zone', 'FGC-FGC_GSD', 'FGC-FGC_GSD_Zone', 
                'FGC-FGC_PU_Zone', 'FGC-FGC_SRL', 'FGC-FGC_SRL_Zone', 'FGC-FGC_TL', 'FGC-FGC_TL_Zone',
                'BIA-BIA_Activity_Level_num', 'BIA-BIA_BMC', 'BIA-BIA_BMI',
                'BIA-BIA_BMR', 'BIA-BIA_DEE', 'BIA-BIA_ECW', 'BIA-BIA_FFM',
                'BIA-BIA_FFMI', 'BIA-BIA_FMI', 'BIA-BIA_Fat', 'BIA-BIA_Frame_num',
                'BIA-BIA_ICW', 'BIA-BIA_LDM', 'BIA-BIA_LST', 'BIA-BIA_SMM',
                'BIA-BIA_TBW', 'PAQ_A-PAQ_A_Total', 'PAQ_Total',
                'PAQ_C-PAQ_C_Total', 'SDS-SDS_Total_Raw',
                'SDS-SDS_Total_T',
                'PreInt_EduHx-computerinternet_hoursday', 'BMI_Age','Internet_Hours_Age','BMI_Internet_Hours',
                'BFP_BMI', 'FFMI_BFP', 'FMI_BFP', 'LST_TBW', 'BFP_BMR', 'BFP_DEE', 'BMR_Weight', 'DEE_Weight',
                'SMM_Height', 'Muscle_to_Fat', 'Hydration_Status', 'ICW_TBW','Age_Weight','Sex_BMI','Sex_HeartRate','Age_WaistCirc',
                'BMI_FitnessMaxStage','Weight_GripStrengthDominant','Weight_GripStrengthNonDominant','HeartRate_FitnessTime',
                'Age_PushUp','FFMI_Age','InternetUse_SleepDisturbance','CGAS_BMI','CGAS_FitnessMaxStage']

featuresCols += time_series_cols
test = test[featuresCols]

In [13]:
train.head(10)

Unnamed: 0,Basic_Demos-Age,Basic_Demos-Sex,FGC-FGC_GS,FGC-FGC_SR,CGAS-CGAS_Score,Physical-BMI,fitness_score,Physical-Height,Physical-Weight,Physical-Waist_Circumference,Physical-Diastolic_BP,Physical-HeartRate,Physical-Systolic_BP,Fitness_Endurance-Max_Stage,Fitness_Endurance-Time_Mins,Fitness_Endurance-Time_Sec,FGC-FGC_CU_Zone,FGC-FGC_GSND,FGC-FGC_GSND_Zone,FGC-FGC_GSD,FGC-FGC_GSD_Zone,FGC-FGC_PU_Zone,FGC-FGC_SRL,FGC-FGC_SRL_Zone,FGC-FGC_TL,FGC-FGC_TL_Zone,BIA-BIA_Activity_Level_num,BIA-BIA_BMC,BIA-BIA_BMI,BIA-BIA_BMR,BIA-BIA_DEE,BIA-BIA_ECW,BIA-BIA_FFM,BIA-BIA_FFMI,BIA-BIA_FMI,BIA-BIA_Fat,BIA-BIA_Frame_num,BIA-BIA_ICW,BIA-BIA_LDM,BIA-BIA_LST,BIA-BIA_SMM,BIA-BIA_TBW,PAQ_A-PAQ_A_Total,PAQ_Total,PAQ_C-PAQ_C_Total,SDS-SDS_Total_Raw,SDS-SDS_Total_T,PreInt_EduHx-computerinternet_hoursday,sii,BMI_Age,Internet_Hours_Age,BMI_Internet_Hours,BFP_BMI,FFMI_BFP,FMI_BFP,LST_TBW,BFP_BMR,BFP_DEE,BMR_Weight,DEE_Weight,SMM_Height,Muscle_to_Fat,Hydration_Status,ICW_TBW,Age_Weight,Sex_BMI,Sex_HeartRate,Age_WaistCirc,BMI_FitnessMaxStage,Weight_GripStrengthDominant,Weight_GripStrengthNonDominant,HeartRate_FitnessTime,Age_PushUp,FFMI_Age,InternetUse_SleepDisturbance,CGAS_BMI,CGAS_FitnessMaxStage,feature_0,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,feature_10,feature_11,feature_12,feature_13,feature_14,feature_15,feature_16,feature_17,feature_18,feature_19,feature_20,feature_21,feature_22,feature_24,feature_25,feature_26,feature_27,feature_28,feature_29,feature_30,feature_31,feature_32,feature_33,feature_34,feature_36,feature_37,feature_39,feature_41,feature_42,feature_43,feature_44,feature_45,feature_46,feature_48,feature_49,feature_50,feature_51,feature_52,feature_53,feature_55,feature_56,feature_57,feature_58,feature_59,feature_60,feature_61,feature_62,feature_63,feature_64,feature_65,feature_67,feature_68,feature_69,feature_70,feature_71,feature_72,feature_73,feature_74,feature_75,feature_76,feature_77,feature_79,feature_81,feature_82,feature_83,feature_84,feature_85,feature_86,feature_87,feature_89,feature_91,feature_92,feature_93,feature_94,feature_95,feature_23_boxcox,feature_35_boxcox,feature_38_boxcox,feature_40_boxcox,feature_47_boxcox,feature_54_boxcox,feature_66_boxcox,feature_78_boxcox,feature_80_boxcox,feature_88_boxcox,feature_90_boxcox
0,5.0,0.0,4.2,0.0,51.0,16.877316,5.2,46.0,50.8,28.0,68.6,83.8,111.6,4.2,4.8,39.2,0.0,18.58,2.6,29.06,1.6,0.0,7.0,0.0,6.0,1.0,2.0,2.66855,16.8792,932.498,1492.0,8.25598,41.5862,13.8177,3.06143,9.21377,1.0,24.4349,8.89536,38.9177,19.5413,32.6909,3.1638,3.1638,2.5882,42.0,56.6,3.0,2,84.386578,15.0,50.631947,0.545865,1.499679,0.332267,1.190475,8591.822097,13746.94484,18.35626,29.370079,0.424811,6.383063,0.643522,0.747453,254.0,0.0,0.0,140.0,70.884726,1476.248,943.864,3687.2,0.0,69.0885,126.0,860.7431,214.2,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,9.0,0.0,3.2,2.0,67.2,14.03559,5.2,48.0,46.0,22.0,75.0,70.0,122.0,4.2,4.8,39.2,0.0,22.98,1.4,21.46,1.8,0.0,11.0,1.0,3.0,0.0,2.0,2.57949,14.0371,936.656,1498.65,6.01993,42.0291,12.8254,1.21172,3.97085,1.0,21.0352,14.974,39.4497,15.4107,27.0552,2.6258,2.6258,2.34,46.0,64.0,0.0,0,126.320313,0.0,0.0,0.282883,3.229888,0.305154,1.458119,3719.320478,5950.914352,20.362087,32.579348,0.321056,12.718037,0.588157,0.777492,414.0,0.0,0.0,198.0,58.949479,987.16,1057.08,3080.0,45.0,115.4286,0.0,943.191667,282.24,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,10.0,1.0,3.0,2.0,71.0,16.648696,7.0,56.5,75.6,28.0,65.0,94.0,117.0,5.0,7.0,33.0,1.0,10.2,1.0,14.7,2.0,1.0,10.0,1.0,5.0,0.0,3.2,3.66177,17.33532,1195.051,2208.25,17.386128,69.5518,13.87402,3.461298,17.488204,1.2,33.03372,19.13194,65.89,32.59526,50.41984,2.028,2.028,2.17,38.0,54.0,2.0,0,166.486961,20.0,33.297392,1.008819,0.793336,0.197922,1.306827,20899.295678,38618.326483,15.807553,29.209656,0.576907,9.417063,0.666929,0.655173,756.0,16.648696,94.0,280.0,83.24348,1111.32,771.12,3760.0,70.0,138.7402,76.0,1182.05742,355.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,9.0,0.0,4.0,0.0,71.0,18.292347,6.0,56.0,81.6,26.8,60.0,97.0,117.0,6.0,9.0,37.0,1.0,18.94,1.8,28.88,2.2,0.0,7.0,0.0,7.0,1.0,3.0,3.84191,18.2943,1131.43,1923.44,15.5925,62.7757,14.074,4.22033,18.8243,2.0,30.4041,16.779,58.9338,26.4798,45.9966,2.3802,2.3802,2.451,31.0,45.0,0.0,1,164.631122,0.0,0.0,1.028971,0.747651,0.224196,1.281264,21298.377749,36207.411592,13.865564,23.571569,0.472854,6.274343,0.563684,0.661008,734.4,0.0,0.0,241.2,109.754082,2356.608,1545.504,4462.0,45.0,126.666,0.0,1298.756633,426.0,0.076294,0.082646,0.085511,0.078161,0.080426,0.079455,0.075918,0.075467,0.082875,0.079193,0.078242,0.078936,0.447151,0.453177,0.449156,0.397713,0.471151,0.000728,0.176714,0.784058,0.564329,0.619829,0.74061,0.693032,0.66668,0.542186,0.370708,0.444183,0.000735,0.332641,0.376667,0.419758,0.660591,0.30263,0.770361,0.56733,4e-06,0.0001881914,0.0001563678,0.668979,0.01015315,0.014863,0.518875,0.324712,0.301765,0.196553,0.187392,0.25768,6.520164e-10,0.697316,0.541301,0.33607,0.68268,0.195066,0.430225,0.501904,0.416306,0.415418,0.425836,2.48608e-08,0.758293,0.664854,0.542784,0.801198,0.196694,0.525594,0.64803,0.619398,0.461347,0.582007,4.000602e-07,0.839808,0.743046,0.825986,0.203474,0.171029,0.170737,0.210966,0.324708,8.7e-05,0.097867,0.986421,0.991002,0.864797,0.225604,-1.528569,-6.68858,-0.005905,-4.413096,-1.175718,-4.464273,-3.285241,-5.297307,-2.703727,-0.002039,-0.570888
4,18.0,1.0,3.8,1.0,70.6,16.570716,6.2,53.05,68.36,28.0,67.6,85.2,110.6,4.8,9.0,43.8,0.4,20.74,2.0,21.08,1.8,0.4,8.6,0.6,9.1,0.6,3.2,3.66177,17.33532,1195.051,2208.25,17.386128,69.5518,13.87402,3.461298,17.488204,1.2,33.03372,19.13194,65.89,32.59526,50.41984,1.04,1.04,2.3418,33.8,48.4,1.8,0,298.272883,32.4,29.827288,1.008819,0.793336,0.197922,1.306827,20899.295678,38618.326483,17.481729,32.303248,0.614425,9.417063,0.737563,0.655173,1230.48,16.570716,85.2,504.0,79.539435,1441.0288,1417.7864,4498.56,288.0,249.73236,60.84,1169.89253,338.88,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
5,13.0,1.0,4.0,2.0,50.0,22.279952,6.0,59.5,112.2,25.0,60.0,73.0,102.0,4.2,4.8,39.2,0.0,16.5,2.0,17.9,2.0,0.0,10.0,1.0,8.0,0.0,2.0,4.33036,30.1865,1330.97,1996.45,30.2124,84.0285,16.6877,13.4988,67.9715,2.0,32.9141,20.902,79.6982,35.3804,63.1265,2.414,2.414,4.11,40.0,56.0,0.0,1,289.639376,0.0,0.0,2.251718,0.24551,0.198595,1.262516,90468.027355,135701.701175,11.862478,17.793672,0.594629,2.621003,0.562625,0.521399,1458.6,22.279952,73.0,325.0,93.575798,2008.38,1851.3,3212.0,78.0,216.9401,0.0,1113.997599,210.0,0.498988,0.501773,0.493171,0.500375,0.499162,0.476953,0.495527,0.513456,0.511584,0.499221,0.490311,0.504044,0.531795,0.475249,0.242572,0.068737,0.236166,0.649013,0.046644,0.37371,0.311003,0.611957,0.71371,0.411626,0.437622,0.621426,0.103925,0.5892,0.999728,0.10257,0.652663,0.727241,0.744415,0.305684,0.831927,0.675527,0.004577,4.591873e-07,1.995214e-07,1.7e-05,6.822056e-07,0.003998,0.43502,0.622418,0.419635,0.03519,0.031253,0.048372,0.03263406,0.339239,0.155479,0.239701,0.621138,0.246743,0.541549,0.50572,0.111579,0.043092,0.166232,0.9810914,0.269699,0.439416,0.533667,0.739028,0.232943,0.411876,0.530517,0.344469,0.053627,0.35912,0.9999931,0.436929,0.833242,0.812124,0.231571,0.120128,0.133278,0.14858,0.214379,1.0,0.103484,0.999985,0.997727,0.857934,0.241777,-1.374412,-12.996469,-0.008307,-3.877758,-1.088516,-3.531964,-3.486908,-6.770012,-3.302046,-0.007867,-0.575691
6,10.0,0.0,4.6,2.0,59.2,19.66076,8.6,55.0,84.6,26.2,123.0,83.0,163.0,4.2,4.8,39.2,1.0,22.178,2.2,23.952,2.4,0.0,11.0,1.0,11.0,1.0,2.0,3.78271,19.6629,1135.86,1817.38,16.3275,63.247,14.7,4.96291,21.353,2.0,30.8936,16.0259,59.4643,26.1957,47.2211,2.3802,2.3802,3.67,27.0,40.0,3.0,0,196.607603,30.0,58.982281,1.085954,0.688428,0.232422,1.259274,24254.01858,38806.51514,13.426241,21.482033,0.476285,5.278294,0.558169,0.654233,846.0,0.0,0.0,262.0,82.575193,2026.3392,1876.2588,3652.0,20.0,147.0,81.0,1163.917012,248.64,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
7,10.0,1.0,3.0,0.0,60.8,16.861286,3.0,59.25,84.2,27.0,71.0,90.0,116.0,4.2,4.8,39.2,0.0,12.6,2.0,11.1,1.0,0.0,0.0,0.0,4.0,0.0,3.0,4.05726,16.8631,1180.04,1888.06,21.94,67.9527,13.6092,3.25395,16.2474,2.0,28.5367,17.476,63.8954,28.768,50.4767,1.936,1.936,1.27,42.4,66.8,2.0,1,168.612865,20.0,33.722573,0.963488,0.837623,0.200275,1.265839,19172.581896,30676.066044,14.014727,22.423515,0.485536,8.840947,0.599486,0.565344,842.0,16.861286,90.0,270.0,70.817403,934.62,1060.92,3960.0,0.0,136.092,84.8,1025.166217,255.36,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
8,15.0,0.0,4.2,1.6,65.0,16.570716,7.6,49.4,114.8,28.0,69.8,82.4,108.2,5.6,6.6,42.4,0.2,19.62,2.4,21.2,1.8,0.8,10.9,0.8,8.9,0.8,3.2,3.66177,17.33532,1195.051,2208.25,17.386128,69.5518,13.87402,3.461298,17.488204,1.2,33.03372,19.13194,65.89,32.59526,50.41984,2.436,2.436,2.3418,34.2,48.8,2.0,0,248.560736,30.0,33.141431,1.008819,0.793336,0.197922,1.306827,20899.295678,38618.326483,10.409852,19.235627,0.659823,9.417063,0.439197,0.655173,1722.0,0.0,0.0,420.0,92.796008,2433.76,2252.376,4037.6,183.0,208.1103,68.4,1077.096522,364.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
9,19.0,1.0,3.8,1.0,56.4,20.889514,6.2,53.4,118.88,28.0,71.4,85.2,109.0,4.8,9.0,43.8,0.4,20.74,2.0,21.08,1.8,0.4,8.6,0.6,9.1,0.6,3.2,3.66177,17.33532,1195.051,2208.25,17.386128,69.5518,13.87402,3.461298,17.488204,1.2,33.03372,19.13194,65.89,32.59526,50.41984,2.028,2.028,2.3418,38.2,54.0,1.8,0,396.900772,34.2,37.601126,1.008819,0.793336,0.197922,1.306827,20899.295678,38618.326483,10.052582,18.575454,0.610398,9.417063,0.424124,0.655173,2258.72,20.889514,85.2,532.0,100.269669,2505.9904,2465.5712,4498.56,304.0,263.60638,68.76,1178.168608,270.72,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [14]:
test.head(10)

Unnamed: 0,Basic_Demos-Age,Basic_Demos-Sex,FGC-FGC_GS,FGC-FGC_SR,CGAS-CGAS_Score,Physical-BMI,fitness_score,Physical-Height,Physical-Weight,Physical-Waist_Circumference,Physical-Diastolic_BP,Physical-HeartRate,Physical-Systolic_BP,Fitness_Endurance-Max_Stage,Fitness_Endurance-Time_Mins,Fitness_Endurance-Time_Sec,FGC-FGC_CU_Zone,FGC-FGC_GSND,FGC-FGC_GSND_Zone,FGC-FGC_GSD,FGC-FGC_GSD_Zone,FGC-FGC_PU_Zone,FGC-FGC_SRL,FGC-FGC_SRL_Zone,FGC-FGC_TL,FGC-FGC_TL_Zone,BIA-BIA_Activity_Level_num,BIA-BIA_BMC,BIA-BIA_BMI,BIA-BIA_BMR,BIA-BIA_DEE,BIA-BIA_ECW,BIA-BIA_FFM,BIA-BIA_FFMI,BIA-BIA_FMI,BIA-BIA_Fat,BIA-BIA_Frame_num,BIA-BIA_ICW,BIA-BIA_LDM,BIA-BIA_LST,BIA-BIA_SMM,BIA-BIA_TBW,PAQ_A-PAQ_A_Total,PAQ_Total,PAQ_C-PAQ_C_Total,SDS-SDS_Total_Raw,SDS-SDS_Total_T,PreInt_EduHx-computerinternet_hoursday,BMI_Age,Internet_Hours_Age,BMI_Internet_Hours,BFP_BMI,FFMI_BFP,FMI_BFP,LST_TBW,BFP_BMR,BFP_DEE,BMR_Weight,DEE_Weight,SMM_Height,Muscle_to_Fat,Hydration_Status,ICW_TBW,Age_Weight,Sex_BMI,Sex_HeartRate,Age_WaistCirc,BMI_FitnessMaxStage,Weight_GripStrengthDominant,Weight_GripStrengthNonDominant,HeartRate_FitnessTime,Age_PushUp,FFMI_Age,InternetUse_SleepDisturbance,CGAS_BMI,CGAS_FitnessMaxStage,feature_0,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,feature_10,feature_11,feature_12,feature_13,feature_14,feature_15,feature_16,feature_17,feature_18,feature_19,feature_20,feature_21,feature_22,feature_24,feature_25,feature_26,feature_27,feature_28,feature_29,feature_30,feature_31,feature_32,feature_33,feature_34,feature_36,feature_37,feature_39,feature_41,feature_42,feature_43,feature_44,feature_45,feature_46,feature_48,feature_49,feature_50,feature_51,feature_52,feature_53,feature_55,feature_56,feature_57,feature_58,feature_59,feature_60,feature_61,feature_62,feature_63,feature_64,feature_65,feature_67,feature_68,feature_69,feature_70,feature_71,feature_72,feature_73,feature_74,feature_75,feature_76,feature_77,feature_79,feature_81,feature_82,feature_83,feature_84,feature_85,feature_86,feature_87,feature_89,feature_91,feature_92,feature_93,feature_94,feature_95,feature_23_boxcox,feature_35_boxcox,feature_38_boxcox,feature_40_boxcox,feature_47_boxcox,feature_54_boxcox,feature_66_boxcox,feature_78_boxcox,feature_80_boxcox,feature_88_boxcox,feature_90_boxcox
0,5,0,,0.0,51.0,16.877316,,46.0,50.8,,,,,,,,0.0,,,,,0.0,7.0,0.0,6.0,1.0,2.0,2.66855,16.8792,932.498,1492.0,8.25598,41.5862,13.8177,3.06143,9.21377,1.0,24.4349,8.89536,38.9177,19.5413,32.6909,,,,,,3.0,84.386578,15.0,50.631947,0.545865,1.499679,0.332267,1.190475,8591.822097,13746.94484,18.35626,29.370079,0.424811,6.383063,0.643522,0.747453,254.0,0.0,,,,,,,0.0,69.0885,,860.7431,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,9,0,,2.0,,14.03559,,48.0,46.0,22.0,75.0,70.0,122.0,,,,0.0,,,,,0.0,11.0,1.0,3.0,0.0,2.0,2.57949,14.0371,936.656,1498.65,6.01993,42.0291,12.8254,1.21172,3.97085,1.0,21.0352,14.974,39.4497,15.4107,27.0552,,2.34,2.34,46.0,64.0,0.0,126.320313,0.0,0.0,0.282883,3.229888,0.305154,1.458119,3719.320478,5950.914352,20.362087,32.579348,0.321056,12.718037,0.588157,0.777492,414.0,0.0,0.0,198.0,,,,,45.0,115.4286,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,10,1,3.0,2.0,71.0,16.648696,7.0,56.5,75.6,,65.0,94.0,117.0,5.0,7.0,33.0,1.0,10.2,1.0,14.7,2.0,1.0,10.0,1.0,5.0,0.0,,,,,,,,,,,,,,,,,,2.17,2.17,38.0,54.0,2.0,166.486961,20.0,33.297392,,,,,,,,,,,,,756.0,16.648696,94.0,,83.24348,1111.32,771.12,3760.0,70.0,,76.0,1182.05742,355.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,9,0,,0.0,71.0,18.292347,,56.0,81.6,,60.0,97.0,117.0,6.0,9.0,37.0,1.0,,,,,0.0,7.0,0.0,7.0,1.0,3.0,3.84191,18.2943,1131.43,1923.44,15.5925,62.7757,14.074,4.22033,18.8243,2.0,30.4041,16.779,58.9338,26.4798,45.9966,,2.451,2.451,31.0,45.0,0.0,164.631122,0.0,0.0,1.028971,0.747651,0.224196,1.281264,21298.377749,36207.411592,13.865564,23.571569,0.472854,6.274343,0.563684,0.661008,734.4,0.0,0.0,,109.754082,,,4462.0,45.0,126.666,0.0,1298.756633,426.0,0.490778,0.491021,0.494113,0.532272,0.497062,0.489691,0.468973,0.48168,0.471231,0.474183,0.491019,0.487002,0.488217,0.520204,0.527702,0.478469,0.509951,0.484998,0.550736,0.507353,0.529389,0.496941,0.516659,0.489394,0.489819,0.510388,0.535545,0.526769,0.513194,0.510469,0.513211,0.510738,0.472778,0.462827,0.509708,0.495283,0.511119,0.465998,0.481436,0.548763,0.565729,0.48334,0.507716,0.489086,0.501649,0.505843,0.496499,0.494646,0.494707,0.483596,0.504429,0.510089,0.511535,0.516464,0.544181,0.537934,0.544716,0.541013,0.501995,0.510423,0.506721,0.512859,0.51672,0.484372,0.459451,0.507592,0.473024,0.50495,0.520822,0.478435,0.484933,0.552391,0.485229,0.473438,0.477719,0.517651,0.474581,0.546096,0.491181,0.480662,0.528365,0.49961,0.50147,0.507855,0.512343,-0.708484,-1.106829,-0.020072,-0.750258,-0.640591,-0.664319,-0.715453,-0.721776,-4.927016,-0.010441,-0.378009
4,18,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.04,1.04,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
5,13,1,4.0,2.0,50.0,22.279952,6.0,59.5,112.2,,60.0,73.0,102.0,,,,0.0,16.5,2.0,17.9,2.0,0.0,10.0,1.0,8.0,0.0,2.0,4.33036,30.1865,1330.97,1996.45,30.2124,84.0285,16.6877,13.4988,67.9715,2.0,32.9141,20.902,79.6982,35.3804,63.1265,,4.11,4.11,40.0,56.0,0.0,289.639376,0.0,0.0,2.251718,0.24551,0.198595,1.262516,90468.027355,135701.701175,11.862478,17.793672,0.594629,2.621003,0.562625,0.521399,1458.6,22.279952,73.0,,,2008.38,1851.3,,78.0,216.9401,0.0,1113.997599,,0.49341,0.489165,0.495411,0.529758,0.498526,0.490178,0.474526,0.4811,0.474511,0.477904,0.485019,0.488703,0.493366,0.522167,0.527094,0.475458,0.509957,0.484674,0.550004,0.506003,0.528718,0.497769,0.516556,0.491424,0.486269,0.507756,0.533675,0.524466,0.517415,0.503188,0.513868,0.514289,0.477924,0.464673,0.50837,0.496184,0.511983,0.474632,0.481173,0.545154,0.558833,0.482745,0.506244,0.491004,0.498396,0.499324,0.500562,0.490363,0.499223,0.479826,0.503441,0.502705,0.511208,0.518419,0.541117,0.533745,0.542207,0.537457,0.499736,0.507877,0.502148,0.512218,0.51584,0.48128,0.462018,0.505761,0.475945,0.503465,0.524413,0.482842,0.487014,0.552298,0.486822,0.474198,0.484581,0.515862,0.475214,0.543467,0.492015,0.483793,0.530582,0.499994,0.5026,0.512728,0.51521,-0.70937,-1.111208,-0.020072,-0.745132,-0.63353,-0.665221,-0.709065,-0.728119,-5.133924,-0.010441,-0.379297
6,10,0,,2.0,,19.66076,,55.0,84.6,,123.0,83.0,163.0,,,,1.0,,,,,0.0,11.0,1.0,11.0,1.0,2.0,3.78271,19.6629,1135.86,1817.38,16.3275,63.247,14.7,4.96291,21.353,2.0,30.8936,16.0259,59.4643,26.1957,47.2211,,3.67,3.67,27.0,40.0,3.0,196.607603,30.0,58.982281,1.085954,0.688428,0.232422,1.259274,24254.01858,38806.51514,13.426241,21.482033,0.476285,5.278294,0.558169,0.654233,846.0,0.0,0.0,,,,,,20.0,147.0,81.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
7,10,1,3.0,0.0,,16.861286,3.0,59.25,84.2,27.0,71.0,90.0,116.0,,,,0.0,12.6,2.0,11.1,1.0,0.0,0.0,0.0,4.0,0.0,3.0,4.05726,16.8631,1180.04,1888.06,21.94,67.9527,13.6092,3.25395,16.2474,2.0,28.5367,17.476,63.8954,28.768,50.4767,,1.27,1.27,,,2.0,168.612865,20.0,33.722573,0.963488,0.837623,0.200275,1.265839,19172.581896,30676.066044,14.014727,22.423515,0.485536,8.840947,0.599486,0.565344,842.0,16.861286,90.0,270.0,,934.62,1060.92,,0.0,136.092,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
8,15,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.0,,30.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
9,19,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [15]:
# Replace inf
if np.any(np.isinf(train)):
    train = train.replace([np.inf, -np.inf], np.nan)

In [16]:
def quadratic_weighted_kappa(y_true, y_pred):
    return cohen_kappa_score(y_true, y_pred, weights='quadratic')

def threshold_Rounder(oof_non_rounded, thresholds):
    return np.where(oof_non_rounded < thresholds[0], 0,
                    np.where(oof_non_rounded < thresholds[1], 1,
                             np.where(oof_non_rounded < thresholds[2], 2, 3)))

def evaluate_predictions(thresholds, y_true, oof_non_rounded):
    rounded_p = threshold_Rounder(oof_non_rounded, thresholds)
    return -quadratic_weighted_kappa(y_true, rounded_p)

def TrainML(model_class, test_data):
    X = train.drop(['sii'], axis=1)
    y = train['sii']

    SKF = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=SEED)
    
    train_S = []
    test_S = []
    
    oof_non_rounded = np.zeros(len(y), dtype=float) 
    oof_rounded = np.zeros(len(y), dtype=int) 
    test_preds = np.zeros((len(test_data), n_splits))

    for fold, (train_idx, test_idx) in enumerate(tqdm(SKF.split(X, y), desc="Training Folds", total=n_splits)):
        X_train, X_val = X.iloc[train_idx], X.iloc[test_idx]
        y_train, y_val = y.iloc[train_idx], y.iloc[test_idx]

        model = clone(model_class)
        model.fit(X_train, y_train)

        y_train_pred = model.predict(X_train)
        y_val_pred = model.predict(X_val)

        oof_non_rounded[test_idx] = y_val_pred
        y_val_pred_rounded = y_val_pred.round(0).astype(int)
        oof_rounded[test_idx] = y_val_pred_rounded

        train_kappa = quadratic_weighted_kappa(y_train, y_train_pred.round(0).astype(int))
        val_kappa = quadratic_weighted_kappa(y_val, y_val_pred_rounded)

        train_S.append(train_kappa)
        test_S.append(val_kappa)
        
        test_preds[:, fold] = model.predict(test_data)
        
        print(f"Fold {fold+1} - Train QWK: {train_kappa:.4f}, Validation QWK: {val_kappa:.4f}")
        clear_output(wait=True)

    print(f"Mean Train QWK --> {np.mean(train_S):.4f}")
    print(f"Mean Validation QWK ---> {np.mean(test_S):.4f}")

    KappaOPtimizer = minimize(evaluate_predictions,
                              x0=[0.5, 1.5, 2.5], args=(y, oof_non_rounded), 
                              method='Nelder-Mead')
    assert KappaOPtimizer.success, "Optimization did not converge."
    
    oof_tuned = threshold_Rounder(oof_non_rounded, KappaOPtimizer.x)
    tKappa = quadratic_weighted_kappa(y, oof_tuned)

    print(f"----> || Optimized QWK SCORE :: {Fore.CYAN}{Style.BRIGHT} {tKappa:.3f}{Style.RESET_ALL}")

    tpm = test_preds.mean(axis=1)
    tpTuned = threshold_Rounder(tpm, KappaOPtimizer.x)
    
    submission = pd.DataFrame({
        'id': sample['id'],
        'sii': tpTuned
    })

    return submission

In [17]:
SEED = 42
n_splits = 5

Params = {
    'n_estimators': 656,
    'learning_rate': 0.015782890450877014,
    'num_leaves': 1066,
    'max_depth': 8,
    'min_child_samples': 100,
    'colsample_bytree': 0.940857616810904,
    'bagging_fraction': 0.9991196168746357,
    'bagging_freq': 6,
    'lambda_l1': 6.332269679324449e-06,
    'lambda_l2': 2.1839610820602853e-06
}


XGB_Params = {
    'max_depth': 11,
    'learning_rate': 0.06582943935263044,
    'n_estimators': 446,
    'min_child_weight': 8,
    'subsample': 0.9993341826573182,
    'colsample_bytree': 0.888783613042681,
    'reg_alpha': 0.001075694456715294,
    'reg_lambda': 0.0012448708989684465
}


CatBoost_Params = {
    'iterations': 490,
    'learning_rate': 0.09077165664505828,
    'depth': 5,
    'l2_leaf_reg': 2.452302163739088,
    'bootstrap_type': "Bernoulli",
    'random_strength': 0.21138971422950448,
    'subsample': 0.612028714191788
}

# Create model instances
Light = LGBMRegressor(**Params, random_state=SEED, verbose=-1)
XGB_Model = XGBRegressor(**XGB_Params)
CatBoost_Model = CatBoostRegressor(**CatBoost_Params)

# Combine models using Voting Regressor
voting_model = VotingRegressor(estimators=[
    ('lightgbm', Light),
    ('xgboost', XGB_Model),
    ('catboost', CatBoost_Model)
])

In [18]:
Submission1 = TrainML(voting_model, test)

# Save submission
Submission1.to_csv('submission.csv', index=False)

Training Folds: 100%|██████████| 5/5 [02:19<00:00, 27.94s/it]

Mean Train QWK --> 0.9283
Mean Validation QWK ---> 0.4322
----> || Optimized QWK SCORE :: [36m[1m 0.483[0m





In [19]:
Submission1

Unnamed: 0,id,sii
0,00008ff9,1
1,000fd460,0
2,00105258,1
3,00115b9f,1
4,0016bb22,2
5,001f3379,1
6,0038ba98,1
7,0068a485,1
8,0069fbed,2
9,0083e397,2


In [None]:
# Submission #2
featuresCols = ['Basic_Demos-Enroll_Season', 'Basic_Demos-Age', 'Basic_Demos-Sex',
                'CGAS-Season', 'CGAS-CGAS_Score', 'Physical-Season', 'Physical-BMI',
                'Physical-Height', 'Physical-Weight', 'Physical-Waist_Circumference',
                'Physical-Diastolic_BP', 'Physical-HeartRate', 'Physical-Systolic_BP',
                'Fitness_Endurance-Season', 'Fitness_Endurance-Max_Stage',
                'Fitness_Endurance-Time_Mins', 'Fitness_Endurance-Time_Sec',
                'FGC-Season', 'FGC-FGC_CU', 'FGC-FGC_CU_Zone', 'FGC-FGC_GSND',
                'FGC-FGC_GSND_Zone', 'FGC-FGC_GSD', 'FGC-FGC_GSD_Zone', 'FGC-FGC_PU',
                'FGC-FGC_PU_Zone', 'FGC-FGC_SRL', 'FGC-FGC_SRL_Zone', 'FGC-FGC_SRR',
                'FGC-FGC_SRR_Zone', 'FGC-FGC_TL', 'FGC-FGC_TL_Zone', 'BIA-Season',
                'BIA-BIA_Activity_Level_num', 'BIA-BIA_BMC', 'BIA-BIA_BMI',
                'BIA-BIA_BMR', 'BIA-BIA_DEE', 'BIA-BIA_ECW', 'BIA-BIA_FFM',
                'BIA-BIA_FFMI', 'BIA-BIA_FMI', 'BIA-BIA_Fat', 'BIA-BIA_Frame_num',
                'BIA-BIA_ICW', 'BIA-BIA_LDM', 'BIA-BIA_LST', 'BIA-BIA_SMM',
                'BIA-BIA_TBW', 'PAQ_A-Season', 'PAQ_A-PAQ_A_Total', 'PAQ_C-Season',
                'PAQ_C-PAQ_C_Total', 'SDS-Season', 'SDS-SDS_Total_Raw',
                'SDS-SDS_Total_T', 'PreInt_EduHx-Season',
                'PreInt_EduHx-computerinternet_hoursday', 'sii']

featuresCols += time_series_cols

train = train[featuresCols]
train = train.dropna(subset='sii')

cat_c = ['Basic_Demos-Enroll_Season', 'CGAS-Season', 'Physical-Season', 
          'Fitness_Endurance-Season', 'FGC-Season', 'BIA-Season', 
          'PAQ_A-Season', 'PAQ_C-Season', 'SDS-Season', 'PreInt_EduHx-Season']

def update(df):
    global cat_c
    for c in cat_c: 
        df[c] = df[c].fillna('Missing')
        df[c] = df[c].astype('category')
    return df
        
train = update(train)
test = update(test)

def create_mapping(column, dataset):
    unique_values = dataset[column].unique()
    return {value: idx for idx, value in enumerate(unique_values)}

for col in cat_c:
    mapping = create_mapping(col, train)
    mappingTe = create_mapping(col, test)
    
    train[col] = train[col].replace(mapping).astype(int)
    test[col] = test[col].replace(mappingTe).astype(int)

def quadratic_weighted_kappa(y_true, y_pred):
    return cohen_kappa_score(y_true, y_pred, weights='quadratic')

def threshold_Rounder(oof_non_rounded, thresholds):
    return np.where(oof_non_rounded < thresholds[0], 0,
                    np.where(oof_non_rounded < thresholds[1], 1,
                             np.where(oof_non_rounded < thresholds[2], 2, 3)))

def evaluate_predictions(thresholds, y_true, oof_non_rounded):
    rounded_p = threshold_Rounder(oof_non_rounded, thresholds)
    return -quadratic_weighted_kappa(y_true, rounded_p)

def TrainML(model_class, test_data):
    X = train.drop(['sii'], axis=1)
    y = train['sii']

    SKF = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=SEED)
    
    train_S = []
    test_S = []
    
    oof_non_rounded = np.zeros(len(y), dtype=float) 
    oof_rounded = np.zeros(len(y), dtype=int) 
    test_preds = np.zeros((len(test_data), n_splits))

    for fold, (train_idx, test_idx) in enumerate(tqdm(SKF.split(X, y), desc="Training Folds", total=n_splits)):
        X_train, X_val = X.iloc[train_idx], X.iloc[test_idx]
        y_train, y_val = y.iloc[train_idx], y.iloc[test_idx]

        model = clone(model_class)
        model.fit(X_train, y_train)

        y_train_pred = model.predict(X_train)
        y_val_pred = model.predict(X_val)

        oof_non_rounded[test_idx] = y_val_pred
        y_val_pred_rounded = y_val_pred.round(0).astype(int)
        oof_rounded[test_idx] = y_val_pred_rounded

        train_kappa = quadratic_weighted_kappa(y_train, y_train_pred.round(0).astype(int))
        val_kappa = quadratic_weighted_kappa(y_val, y_val_pred_rounded)

        train_S.append(train_kappa)
        test_S.append(val_kappa)
        
        test_preds[:, fold] = model.predict(test_data)
        
        print(f"Fold {fold+1} - Train QWK: {train_kappa:.4f}, Validation QWK: {val_kappa:.4f}")
        clear_output(wait=True)

    print(f"Mean Train QWK --> {np.mean(train_S):.4f}")
    print(f"Mean Validation QWK ---> {np.mean(test_S):.4f}")

    KappaOPtimizer = minimize(evaluate_predictions,
                              x0=[0.5, 1.5, 2.5], args=(y, oof_non_rounded), 
                              method='Nelder-Mead')
    assert KappaOPtimizer.success, "Optimization did not converge."
    
    oof_tuned = threshold_Rounder(oof_non_rounded, KappaOPtimizer.x)
    tKappa = quadratic_weighted_kappa(y, oof_tuned)

    print(f"----> || Optimized QWK SCORE :: {Fore.CYAN}{Style.BRIGHT} {tKappa:.3f}{Style.RESET_ALL}")

    tpm = test_preds.mean(axis=1)
    tpTuned = threshold_Rounder(tpm, KappaOPtimizer.x)
    
    submission = pd.DataFrame({
        'id': sample['id'],
        'sii': tpTuned
    })

    return submission

# Model parameters for LightGBM
Params = {
    'learning_rate': 0.046,
    'max_depth': 12,
    'num_leaves': 478,
    'min_data_in_leaf': 13,
    'feature_fraction': 0.893,
    'bagging_fraction': 0.784,
    'bagging_freq': 4,
    'lambda_l1': 10,  # Increased from 6.59
    'lambda_l2': 0.01  # Increased from 2.68e-06
}


# XGBoost parameters
XGB_Params = {
    'learning_rate': 0.05,
    'max_depth': 6,
    'n_estimators': 200,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'reg_alpha': 1,  # Increased from 0.1
    'reg_lambda': 5,  # Increased from 1
    'random_state': SEED
}


CatBoost_Params = {
    'learning_rate': 0.05,
    'depth': 6,
    'iterations': 200,
    'random_seed': SEED,
    'cat_features': cat_c,
    'verbose': 0,
    'l2_leaf_reg': 10  # Increase this value
}

# Create model instances
Light = LGBMRegressor(**Params, random_state=SEED, verbose=-1, n_estimators=300)
XGB_Model = XGBRegressor(**XGB_Params)
CatBoost_Model = CatBoostRegressor(**CatBoost_Params)

# Combine models using Voting Regressor
voting_model = VotingRegressor(estimators=[
    ('lightgbm', Light),
    ('xgboost', XGB_Model),
    ('catboost', CatBoost_Model)
])

# Train the ensemble model
Submission2 = TrainML(voting_model, test)

# Save submission
#Submission2.to_csv('submission.csv', index=False)
Submission2

In [None]:
train = pd.read_csv('/kaggle/input/child-mind-institute-problematic-internet-use/train.csv')
test = pd.read_csv('/kaggle/input/child-mind-institute-problematic-internet-use/test.csv')
sample = pd.read_csv('/kaggle/input/child-mind-institute-problematic-internet-use/sample_submission.csv')

featuresCols = ['Basic_Demos-Enroll_Season', 'Basic_Demos-Age', 'Basic_Demos-Sex',
                'CGAS-Season', 'CGAS-CGAS_Score', 'Physical-Season', 'Physical-BMI',
                'Physical-Height', 'Physical-Weight', 'Physical-Waist_Circumference',
                'Physical-Diastolic_BP', 'Physical-HeartRate', 'Physical-Systolic_BP',
                'Fitness_Endurance-Season', 'Fitness_Endurance-Max_Stage',
                'Fitness_Endurance-Time_Mins', 'Fitness_Endurance-Time_Sec',
                'FGC-Season', 'FGC-FGC_CU', 'FGC-FGC_CU_Zone', 'FGC-FGC_GSND',
                'FGC-FGC_GSND_Zone', 'FGC-FGC_GSD', 'FGC-FGC_GSD_Zone', 'FGC-FGC_PU',
                'FGC-FGC_PU_Zone', 'FGC-FGC_SRL', 'FGC-FGC_SRL_Zone', 'FGC-FGC_SRR',
                'FGC-FGC_SRR_Zone', 'FGC-FGC_TL', 'FGC-FGC_TL_Zone', 'BIA-Season',
                'BIA-BIA_Activity_Level_num', 'BIA-BIA_BMC', 'BIA-BIA_BMI',
                'BIA-BIA_BMR', 'BIA-BIA_DEE', 'BIA-BIA_ECW', 'BIA-BIA_FFM',
                'BIA-BIA_FFMI', 'BIA-BIA_FMI', 'BIA-BIA_Fat', 'BIA-BIA_Frame_num',
                'BIA-BIA_ICW', 'BIA-BIA_LDM', 'BIA-BIA_LST', 'BIA-BIA_SMM',
                'BIA-BIA_TBW', 'PAQ_A-Season', 'PAQ_A-PAQ_A_Total', 'PAQ_C-Season',
                'PAQ_C-PAQ_C_Total', 'SDS-Season', 'SDS-SDS_Total_Raw',
                'SDS-SDS_Total_T', 'PreInt_EduHx-Season',
                'PreInt_EduHx-computerinternet_hoursday', 'sii']

cat_c = ['Basic_Demos-Enroll_Season', 'CGAS-Season', 'Physical-Season', 
          'Fitness_Endurance-Season', 'FGC-Season', 'BIA-Season', 
          'PAQ_A-Season', 'PAQ_C-Season', 'SDS-Season', 'PreInt_EduHx-Season']

train_ts = load_time_series("/kaggle/input/child-mind-institute-problematic-internet-use/series_train.parquet")
test_ts = load_time_series("/kaggle/input/child-mind-institute-problematic-internet-use/series_test.parquet")

time_series_cols = train_ts.columns.tolist()
time_series_cols.remove("id")

train = pd.merge(train, train_ts, how="left", on='id')
test = pd.merge(test, test_ts, how="left", on='id')

train = train.drop('id', axis=1)
test = test.drop('id', axis=1)

featuresCols += time_series_cols

train = train[featuresCols]
train = train.dropna(subset='sii')

def update(df):
    global cat_c
    for c in cat_c: 
        df[c] = df[c].fillna('Missing')
        df[c] = df[c].astype('category')
    return df

train = update(train)
test = update(test)

def create_mapping(column, dataset):
    unique_values = dataset[column].unique()
    return {value: idx for idx, value in enumerate(unique_values)}

for col in cat_c:
    mapping = create_mapping(col, train)
    mappingTe = create_mapping(col, test)
    
    train[col] = train[col].replace(mapping).astype(int)
    test[col] = test[col].replace(mappingTe).astype(int)

def quadratic_weighted_kappa(y_true, y_pred):
    return cohen_kappa_score(y_true, y_pred, weights='quadratic')

def threshold_Rounder(oof_non_rounded, thresholds):
    return np.where(oof_non_rounded < thresholds[0], 0,
                    np.where(oof_non_rounded < thresholds[1], 1,
                             np.where(oof_non_rounded < thresholds[2], 2, 3)))

def evaluate_predictions(thresholds, y_true, oof_non_rounded):
    rounded_p = threshold_Rounder(oof_non_rounded, thresholds)
    return -quadratic_weighted_kappa(y_true, rounded_p)

def TrainML(model_class, test_data):
    X = train.drop(['sii'], axis=1)
    y = train['sii']

    SKF = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=SEED)
    
    train_S = []
    test_S = []
    
    oof_non_rounded = np.zeros(len(y), dtype=float) 
    oof_rounded = np.zeros(len(y), dtype=int) 
    test_preds = np.zeros((len(test_data), n_splits))

    for fold, (train_idx, test_idx) in enumerate(tqdm(SKF.split(X, y), desc="Training Folds", total=n_splits)):
        X_train, X_val = X.iloc[train_idx], X.iloc[test_idx]
        y_train, y_val = y.iloc[train_idx], y.iloc[test_idx]

        model = clone(model_class)
        model.fit(X_train, y_train)

        y_train_pred = model.predict(X_train)
        y_val_pred = model.predict(X_val)

        oof_non_rounded[test_idx] = y_val_pred
        y_val_pred_rounded = y_val_pred.round(0).astype(int)
        oof_rounded[test_idx] = y_val_pred_rounded

        train_kappa = quadratic_weighted_kappa(y_train, y_train_pred.round(0).astype(int))
        val_kappa = quadratic_weighted_kappa(y_val, y_val_pred_rounded)

        train_S.append(train_kappa)
        test_S.append(val_kappa)
        
        test_preds[:, fold] = model.predict(test_data)
        
        print(f"Fold {fold+1} - Train QWK: {train_kappa:.4f}, Validation QWK: {val_kappa:.4f}")
        clear_output(wait=True)

    print(f"Mean Train QWK --> {np.mean(train_S):.4f}")
    print(f"Mean Validation QWK ---> {np.mean(test_S):.4f}")

    KappaOPtimizer = minimize(evaluate_predictions,
                              x0=[0.5, 1.5, 2.5], args=(y, oof_non_rounded), 
                              method='Nelder-Mead')
    assert KappaOPtimizer.success, "Optimization did not converge."
    
    oof_tuned = threshold_Rounder(oof_non_rounded, KappaOPtimizer.x)
    tKappa = quadratic_weighted_kappa(y, oof_tuned)

    print(f"----> || Optimized QWK SCORE :: {Fore.CYAN}{Style.BRIGHT} {tKappa:.3f}{Style.RESET_ALL}")

    tpm = test_preds.mean(axis=1)
    tp_rounded = threshold_Rounder(tpm, KappaOPtimizer.x)

    return tp_rounded

imputer = SimpleImputer(strategy='median')

ensemble = VotingRegressor(estimators=[
    ('lgb', Pipeline(steps=[('imputer', imputer), ('regressor', LGBMRegressor(random_state=SEED))])),
    ('xgb', Pipeline(steps=[('imputer', imputer), ('regressor', XGBRegressor(random_state=SEED))])),
    ('cat', Pipeline(steps=[('imputer', imputer), ('regressor', CatBoostRegressor(random_state=SEED, silent=True))])),
    ('rf', Pipeline(steps=[('imputer', imputer), ('regressor', RandomForestRegressor(random_state=SEED))])),
    ('gb', Pipeline(steps=[('imputer', imputer), ('regressor', GradientBoostingRegressor(random_state=SEED))]))
])

Submission3 = TrainML(ensemble, test)

In [None]:
Submission3 = pd.DataFrame({
    'id': sample['id'],
    'sii': Submission3
})

Submission3

In [None]:
sub1 = Submission1
sub2 = Submission2
sub3 = Submission3

sub1 = sub1.sort_values(by='id').reset_index(drop=True)
sub2 = sub2.sort_values(by='id').reset_index(drop=True)
sub3 = sub3.sort_values(by='id').reset_index(drop=True)

combined = pd.DataFrame({
    'id': sub1['id'],
    'sii_1': sub1['sii'],
    'sii_2': sub2['sii'],
    'sii_3': sub3['sii']
})

def majority_vote(row):
    return row.mode()[0]

combined['final_sii'] = combined[['sii_1', 'sii_2', 'sii_3']].apply(majority_vote, axis=1)

final_submission = combined[['id', 'final_sii']].rename(columns={'final_sii': 'sii'})

final_submission.to_csv('submission.csv', index=False)

print("Majority voting completed and saved to 'Final_Submission.csv'")

In [None]:
final_submission