### In this notebook, we will run a very simple baseline using only FTTransformer

In [1]:
import numpy as np
import pandas as pd

#### Drop any samplers which only missed any values in PCIAT test

In [2]:
train_data = pd.read_csv("/kaggle/input/child-mind-institute-problematic-internet-use/train.csv")
test_data = pd.read_csv("/kaggle/input/child-mind-institute-problematic-internet-use/test.csv")

In [3]:
columns_not_in_test = ['PCIAT-PCIAT_01', 'PCIAT-PCIAT_02', 'PCIAT-PCIAT_03', 'PCIAT-PCIAT_04', 'PCIAT-PCIAT_05', 'PCIAT-PCIAT_06', 'PCIAT-PCIAT_07', 'PCIAT-PCIAT_08', 'PCIAT-PCIAT_09', 'PCIAT-PCIAT_10', 'PCIAT-PCIAT_11', 'PCIAT-PCIAT_12', 'PCIAT-PCIAT_13', 'PCIAT-PCIAT_14', 'PCIAT-PCIAT_15', 'PCIAT-PCIAT_16', 'PCIAT-PCIAT_17', 'PCIAT-PCIAT_18', 'PCIAT-PCIAT_19', 'PCIAT-PCIAT_20', 'PCIAT-PCIAT_Total', 'PCIAT-Season', 'sii']
train_data = train_data.dropna(subset=columns_not_in_test)

#### So, we got a quire reliable labels here, the next step would be create X and y

In [4]:
label_related_features = ['PCIAT-PCIAT_01', 'PCIAT-PCIAT_02', 'PCIAT-PCIAT_03', 'PCIAT-PCIAT_04', 'PCIAT-PCIAT_05', 'PCIAT-PCIAT_06', 'PCIAT-PCIAT_07', 'PCIAT-PCIAT_08', 'PCIAT-PCIAT_09', 'PCIAT-PCIAT_10', 'PCIAT-PCIAT_11', 'PCIAT-PCIAT_12', 'PCIAT-PCIAT_13', 'PCIAT-PCIAT_14', 'PCIAT-PCIAT_15', 'PCIAT-PCIAT_16', 'PCIAT-PCIAT_17', 'PCIAT-PCIAT_18', 'PCIAT-PCIAT_19', 'PCIAT-PCIAT_20', 'PCIAT-PCIAT_Total', 'PCIAT-Season', 'sii']
label = ['PCIAT-PCIAT_Total']
X = train_data.drop(label_related_features, axis = 1)
y = train_data[label]

#### Encode data

In [5]:
from sklearn.preprocessing import OneHotEncoder


def encode_seasonal_data(train_data, test_data, features_to_encode):
    encoded_train = train_data.copy()
    encoded_test = test_data.copy()
    
    for feature in features_to_encode:
        encoder = OneHotEncoder(handle_unknown='ignore', sparse=False)
        train_encoded = encoder.fit_transform(encoded_train[[feature]])
        test_encoded = encoder.transform(encoded_test[[feature]])
        column_names = [f"{feature}_{cat}" for cat in encoder.categories_[0]]
        train_encoded_df = pd.DataFrame(train_encoded,
                                      columns=column_names,
                                      index=encoded_train.index)
        test_encoded_df = pd.DataFrame(test_encoded,
                                     columns=column_names,
                                     index=encoded_test.index)
        encoded_train = pd.concat([encoded_train.drop(columns=[feature]), 
                                 train_encoded_df], axis=1)
        encoded_test = pd.concat([encoded_test.drop(columns=[feature]), 
                                test_encoded_df], axis=1)
    
    return encoded_train, encoded_test

In [6]:
features_to_encode = X.select_dtypes(include=['object']).columns.to_list()
features_to_encode = features_to_encode[1:]
print(features_to_encode)

['Basic_Demos-Enroll_Season', 'CGAS-Season', 'Physical-Season', 'Fitness_Endurance-Season', 'FGC-Season', 'BIA-Season', 'PAQ_A-Season', 'PAQ_C-Season', 'SDS-Season', 'PreInt_EduHx-Season']


In [7]:
X_encoded, test_encoded = encode_seasonal_data(X, test_data, features_to_encode)
new_X = X_encoded.drop(['id'], axis = 1)
new_test = test_encoded.drop(['id'], axis = 1)



## In this first notebook, i just fill data with mean value

In [8]:
new_X =  new_X.fillna(new_X.mean())
new_test =  new_test.fillna(new_test.mean())
new_X.isna().sum()

Basic_Demos-Age               0
Basic_Demos-Sex               0
CGAS-CGAS_Score               0
Physical-BMI                  0
Physical-Height               0
                             ..
PreInt_EduHx-Season_Fall      0
PreInt_EduHx-Season_Spring    0
PreInt_EduHx-Season_Summer    0
PreInt_EduHx-Season_Winter    0
PreInt_EduHx-Season_nan       0
Length: 97, dtype: int64

### Training

In [9]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

X_train, X_val, y_train, y_val = train_test_split(new_X, y, test_size=0.2, random_state=42)
# scaler = StandardScaler()
# X_train_scaled = scaler.fit_transform(X_train)
# X_val_scaled = scaler.transform(X_val)

In [10]:
def get_info_for_ftt(df):
    # this function returns 2 things
    # first, categories = (10, 5, 6, 5, 8),      
    # tuple containing the number of unique values within each category
    # second, num_continuous = 10, # number of continuous values 
    # both of them are parameters of FTTransformer
    number_of_cat = 0
    cat_ranges = []
    all_features = df.columns.tolist()
    cat_idx = []
    for i, feature in enumerate(all_features):
        if (df[feature].nunique() <= 2):
            number_of_cat = number_of_cat + 1
            cat_ranges.append(df[feature].nunique())
            cat_idx.append(i)
    
    num_continuous = df.shape[-1] - number_of_cat
    return cat_ranges, num_continuous, cat_idx


In [11]:
cat_ranges, num_continuous, cat_idx = get_info_for_ftt(new_X)

In [12]:
import torch
from torch.utils.data import Dataset

class MyDataset(Dataset):
    def __init__(self, features, labels, cat_idx):
        self.features = features
        self.labels = labels
        self.cat_idx = cat_idx
        
    def __len__(self):
        return len(self.labels)
        
    def divide_cat_num(self, row_data):
        mask = np.zeros(len(row_data), dtype=bool)
        mask[self.cat_idx] = True
        cat_elements = row_data[mask]
        remaining_elements = row_data[~mask]
        return cat_elements, remaining_elements

    def __getitem__(self, idx):
        row_value = self.features[idx]
        tensor_row_value = torch.tensor(row_value)
        cat_values, num_values = self.divide_cat_num(row_value)
        cat_values = torch.tensor(cat_values, dtype=torch.int32)
        num_values = torch.tensor(num_values, dtype=torch.float64)
        label = self.labels.iloc[idx].values
        tensor_label = torch.tensor(label, dtype=torch.float64) 
        return cat_values, num_values, tensor_label

In [13]:
from torch.utils.data import DataLoader

In [14]:
train_dataset = MyDataset(X_train.values, y_train, cat_idx)
val_dataset = MyDataset(X_val.values, y_val, cat_idx)


In [15]:
trainloader = DataLoader(train_dataset, batch_size = 64, shuffle = True)
valloader = DataLoader(val_dataset, batch_size = 64, shuffle = True)

In [16]:
def train_model(train_loader, val_loader, model, criterion, optimizer, num_epochs, save_path='model_checkpoints'):
    import os
    import torch
    
    # Create save directory if it doesn't exist
    os.makedirs(save_path, exist_ok=True)
    
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = model.to(device)
    best_val_loss = float('inf')
    
    for epoch in range(num_epochs):
        model.train()
        train_loss = 0
        for x_cat, x_num, y in train_loader:
            # Move all inputs to device
            x_cat = x_cat.to(device) if x_cat is not None else None
            x_num = x_num.to(device)
            y = y.to(device)
            
            optimizer.zero_grad()
            output = model(x_cat, x_num)
            loss = criterion(output, y)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
            
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for x_cat, x_num, y in val_loader:
                x_cat = x_cat.to(device) if x_cat is not None else None
                x_num = x_num.to(device)
                y = y.to(device)
                
                output = model(x_cat, x_num)
                val_loss += criterion(output, y).item()
        
        avg_train_loss = train_loss/len(train_loader)
        avg_val_loss = val_loss/len(val_loader)
        print(f'Epoch {epoch+1}, Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}')
        
        # Save the best model
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            checkpoint = {
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'train_loss': avg_train_loss,
                'val_loss': avg_val_loss,
                'best_val_loss': best_val_loss
            }
            torch.save(checkpoint, os.path.join(save_path, f'best_model.pth'))
            print(f'Saved best model with validation loss: {best_val_loss:.4f}')

In [17]:
!pip install --no-deps /kaggle/input/tab-transformer-pytorch/tab_transformer_pytorch-0.3.0-py3-none-any.whl

Processing /kaggle/input/tab-transformer-pytorch/tab_transformer_pytorch-0.3.0-py3-none-any.whl
Installing collected packages: tab-transformer-pytorch
Successfully installed tab-transformer-pytorch-0.3.0


In [18]:
!pip install /kaggle/input/fttransformer/einops-0.8.0-py3-none-any.whl

Processing /kaggle/input/fttransformer/einops-0.8.0-py3-none-any.whl
Installing collected packages: einops
Successfully installed einops-0.8.0


In [19]:
from tab_transformer_pytorch import FTTransformer
import torch.nn as nn
import torch.optim as optim

In [20]:
cat_ranges, num_continuous, cat_idx = get_info_for_ftt(new_X)

In [21]:

from tab_transformer_pytorch import FTTransformer
model = FTTransformer(
    categories = tuple(cat_ranges), 
    num_continuous = num_continuous,                
    dim = 97,                           
    dim_out = 1,                        
    depth = 6,                          
    heads = 8,                          
    attn_dropout = 0.1,                 
    ff_dropout = 0.1,                  
)
model = model.to(torch.float64)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

train_model(
    train_loader=trainloader,
    val_loader=valloader,
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    num_epochs=100
)


Epoch 1, Train Loss: 997.3377, Val Loss: 904.4184
Saved best model with validation loss: 904.4184
Epoch 2, Train Loss: 911.2197, Val Loss: 799.8868
Saved best model with validation loss: 799.8868
Epoch 3, Train Loss: 833.5609, Val Loss: 789.8424
Saved best model with validation loss: 789.8424
Epoch 4, Train Loss: 756.4792, Val Loss: 668.5363
Saved best model with validation loss: 668.5363
Epoch 5, Train Loss: 688.8826, Val Loss: 599.5619
Saved best model with validation loss: 599.5619
Epoch 6, Train Loss: 609.5086, Val Loss: 552.5035
Saved best model with validation loss: 552.5035
Epoch 7, Train Loss: 552.1508, Val Loss: 519.3555
Saved best model with validation loss: 519.3555
Epoch 8, Train Loss: 511.7710, Val Loss: 483.9323
Saved best model with validation loss: 483.9323
Epoch 9, Train Loss: 474.1945, Val Loss: 427.2409
Saved best model with validation loss: 427.2409
Epoch 10, Train Loss: 452.5389, Val Loss: 420.6824
Saved best model with validation loss: 420.6824
Epoch 11, Train Los

In [23]:
def load_model(model, checkpoint_path):
    checkpoint = torch.load(checkpoint_path)
    model.load_state_dict(checkpoint['model_state_dict'])
    return model

In [24]:
checkpoint_path = "/kaggle/working/model_checkpoints/best_model.pth"
# checkpoint_path = "/kaggle/input/best_v1/pytorch/default/1/best_model_val_loss_291.7448.pth"
model = load_model(model, checkpoint_path)


  checkpoint = torch.load(checkpoint_path)


In [25]:
def get_prediction(new_test, cat_idx, model, batch_size=32, track_memory=False):
    def divide_cat_num(data, cat_idx):
        mask = np.zeros(data.shape[1], dtype=bool)
        mask[cat_idx] = True
        cat_elements = data[:, mask]
        remaining_elements = data[:, ~mask]
        return cat_elements, remaining_elements
    
    device = 'cuda'
    new_test_values = new_test.values
    num_samples = len(new_test_values)
    predictions = []
    
    # Process data in batches
    for i in range(0, num_samples, batch_size):
        if track_memory:
            print(f'GPU Memory before batch {i}: {torch.cuda.memory_allocated()/1024**2:.2f} MB')
            
        batch_data = new_test_values[i:i + batch_size]
        cat, num = divide_cat_num(batch_data, cat_idx)
        
        cat = torch.tensor(cat, dtype=torch.int32).to(device)
        num = torch.tensor(num, dtype=torch.float64).to(device)
        
        with torch.no_grad():
            output = model(cat, num)
            output = output.squeeze(1)
            predictions.extend(output.cpu().numpy())
        
        # Clear GPU memory
        del cat
        del num
        del output
        torch.cuda.empty_cache()
        
        if track_memory:
            print(f'GPU Memory after batch {i}: {torch.cuda.memory_allocated()/1024**2:.2f} MB')
    
    return predictions

In [26]:
prediction = get_prediction(new_test, cat_idx, model)
prediction

[20.711689665266185,
 17.760663056539578,
 31.324933788495876,
 26.840195697473813,
 32.45676096566512,
 29.43825926032099,
 25.54967719428012,
 22.254143156819318,
 32.353447844602535,
 34.233625910491774,
 26.733812861074103,
 28.582423369171426,
 32.87683360699251,
 33.387248035395125,
 30.47322572232639,
 21.10235209443912,
 13.507134381590427,
 22.66908485968623,
 22.83752607740625,
 28.915843134810753]

In [27]:
test_id = test_data['id']

In [28]:
def handle_prediction(predictions, test_id):
    sii = []
    for i in range(len(predictions)):
        predict = predictions[i]
        if (predict >=  0 and predict <= 30):
            sii.append(0)
        elif(predict < 50):
            sii.append(1)
        elif(predict < 80):
            sii.append(2)
        else:
            sii.append(3)
    sii = pd.DataFrame(sii)
    submission = pd.concat([test_id, sii], axis = 1)
    submission = submission.rename(columns={0: 'sii'})
    return submission
submission = handle_prediction(prediction, test_id)

In [29]:
submission.to_csv("submission.csv", index=False)