In [3]:
import sys
import time
import math
import itertools

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
from scipy.special import logit
from scipy.stats import norm

import tensorflow as tf
from keras import layers, models, datasets

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
from torch.optim.lr_scheduler import StepLR, LambdaLR
import torch.autograd.profiler as profiler

from sklearn.model_selection import train_test_split, LeaveOneOut, StratifiedKFold, cross_val_predict
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder, PowerTransformer
from sklearn.metrics import f1_score, log_loss, accuracy_score
from sklearn.linear_model import LogisticRegression
from scipy.stats import norm

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [4]:
import torch

batch_size, features = 2, 3
x = torch.arange(batch_size * features, dtype=torch.float).reshape(batch_size, features)

x_i = x.unsqueeze(2).expand(batch_size, features, features)
x_j = x.unsqueeze(1).expand(batch_size, features, features)

pairs_4d = torch.stack([x_i, x_j], dim=-1)
pairs_2d = pairs_4d.view(batch_size, features * features, 2)

print(pairs_4d.shape)
print("Input shape:", x.shape)
print(x)
print("Pairs shape:", pairs_2d.shape)
print("Pairs: ", pairs_2d)

torch.Size([2, 3, 3, 2])
Input shape: torch.Size([2, 3])
tensor([[0., 1., 2.],
        [3., 4., 5.]])
Pairs shape: torch.Size([2, 9, 2])
Pairs:  tensor([[[0., 0.],
         [0., 1.],
         [0., 2.],
         [1., 0.],
         [1., 1.],
         [1., 2.],
         [2., 0.],
         [2., 1.],
         [2., 2.]],

        [[3., 3.],
         [3., 4.],
         [3., 5.],
         [4., 3.],
         [4., 4.],
         [4., 5.],
         [5., 3.],
         [5., 4.],
         [5., 5.]]])


In [5]:
idx = torch.combinations(torch.arange(4), r=2, with_replacement=True)
print(idx.shape)
print(idx)

torch.Size([10, 2])
tensor([[0, 0],
        [0, 1],
        [0, 2],
        [0, 3],
        [1, 1],
        [1, 2],
        [1, 3],
        [2, 2],
        [2, 3],
        [3, 3]])


In [6]:
import torch

B, N = 2, 4
x = torch.randn(B, N)

M = N*(N+1)//2
pairs = torch.rand(B, M, 2)
print(pairs)

pairs_flat = pairs.view(B, -1)
print(pairs_flat.shape)
print(pairs_flat)

pairs_unflat = pairs_flat.view(B, M, 2)
print(pairs_unflat)

tensor([[[0.6041, 0.7659],
         [0.8416, 0.5386],
         [0.5562, 0.2203],
         [0.6311, 0.7825],
         [0.0263, 0.0084],
         [0.9168, 0.9597],
         [0.0654, 0.9866],
         [0.4473, 0.4602],
         [0.0402, 0.8973],
         [0.8459, 0.4366]],

        [[0.7446, 0.5607],
         [0.1047, 0.4008],
         [0.6539, 0.0341],
         [0.4687, 0.4814],
         [0.3425, 0.3255],
         [0.8493, 0.1935],
         [0.5175, 0.1076],
         [0.0671, 0.8789],
         [0.1894, 0.5983],
         [0.6753, 0.4562]]])
torch.Size([2, 20])
tensor([[0.6041, 0.7659, 0.8416, 0.5386, 0.5562, 0.2203, 0.6311, 0.7825, 0.0263,
         0.0084, 0.9168, 0.9597, 0.0654, 0.9866, 0.4473, 0.4602, 0.0402, 0.8973,
         0.8459, 0.4366],
        [0.7446, 0.5607, 0.1047, 0.4008, 0.6539, 0.0341, 0.4687, 0.4814, 0.3425,
         0.3255, 0.8493, 0.1935, 0.5175, 0.1076, 0.0671, 0.8789, 0.1894, 0.5983,
         0.6753, 0.4562]])
tensor([[[0.6041, 0.7659],
         [0.8416, 0.5386],
     

In [7]:
def calculate_metrics(model, data_tensor, labels_tensor, batch_size=1024, num_features=22):
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for start_idx in range(0, len(data_tensor), batch_size):
            end_idx = min(start_idx + batch_size, len(data_tensor))
            inputs = data_tensor[start_idx:end_idx].view(-1, num_features)
            labels = labels_tensor[start_idx:end_idx]

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    accuracy = accuracy_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds, average='weighted')
    return accuracy, f1

In [8]:
class CustomDataLoader:
    def __init__(self, features, labels, validation_size=0.2, random_state=42, classification=True):        
        if validation_size > 0.0:
            stratify = labels if classification else None
            train_data, val_data, train_labels, val_labels = train_test_split(
                features, labels, test_size=validation_size, stratify=stratify, random_state=random_state
            )
            
            self.val_data_tensor = torch.tensor(val_data).float().to(device)
            
            if classification:
                self.val_labels_tensor = torch.tensor(val_labels).long().to(device)

            else:
                self.val_labels_tensor =torch.tensor(val_labels).float().to(device)
        else:
            train_data, train_labels = features, labels
            self.val_data_tensor, self.val_labels_tensor = None, None
        
        self.train_data_tensor = torch.tensor(train_data).float().to(device)

        if classification:
            self.train_labels_tensor = torch.tensor(train_labels).long().to(device)
        else:
            self.train_labels_tensor = torch.tensor(train_labels).float().to(device)

        torch.manual_seed(random_state)
        indices = torch.randperm(len(self.train_data_tensor))

        self.train_data_tensor = self.train_data_tensor[indices]
        self.train_labels_tensor = self.train_labels_tensor[indices]

In [33]:
def evaluate_model(model, custom_train_loader, criterion, optimizer, num_epochs, scheduler, batch_size=1024, num_features=22, early_stopping_patience=10):
    best_val_loss = float('inf')
    best_epoch = 0
    patience_counter = 0
    
    for epoch in range(num_epochs):
        running_loss = 0.0
        model.train()
        i = 0
        total_loss = 0
        num_items = 0

        for start_idx in range(0, len(custom_train_loader.train_data_tensor), batch_size):
            end_idx = min(start_idx + batch_size, len(custom_train_loader.train_data_tensor))
            inputs = custom_train_loader.train_data_tensor[start_idx:end_idx].view(-1, num_features)
            labels = custom_train_loader.train_labels_tensor[start_idx:end_idx]

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels, model)
            # loss.backward()
            # optimizer.step()
            # scheduler.step()
            running_loss += loss.item() * len(labels)
            total_loss += loss.item() * len(labels)
            num_items += len(labels)

            i += 1

        if epoch % 10 == 0:
            model.eval()

            train_reg_loss = 0.0
            val_loss = 0.0
            with torch.no_grad():
                for start_idx in range(0, len(custom_train_loader.train_data_tensor), batch_size):
                    end_idx = min(start_idx + batch_size, len(custom_train_loader.train_data_tensor))
                    inputs = custom_train_loader.train_data_tensor[start_idx:end_idx].view(-1, num_features)
                    labels = custom_train_loader.train_labels_tensor[start_idx:end_idx]
        
                    outputs = model(inputs)
                    train_reg_loss += criterion.regular_loss(outputs, labels).item() * len(labels)

                for start_idx in range(0, len(custom_train_loader.val_data_tensor), batch_size):
                    end_idx = min(start_idx + batch_size, len(custom_train_loader.val_data_tensor))
                    val_inputs = custom_train_loader.val_data_tensor[start_idx:end_idx].view(-1, num_features)
                    val_labels = custom_train_loader.val_labels_tensor[start_idx:end_idx]
    
                    val_outputs = model(val_inputs)
                    val_loss += criterion.regular_loss(val_outputs, val_labels).item() * len(val_labels)
    
            avg_train_loss = running_loss / len(custom_train_loader.train_data_tensor)
            avg_val_loss = val_loss / len(custom_train_loader.val_data_tensor)
    
            train_accuracy, train_f1 = calculate_metrics(model, custom_train_loader.train_data_tensor, custom_train_loader.train_labels_tensor, batch_size, num_features)
            val_accuracy, val_f1 = calculate_metrics(model, custom_train_loader.val_data_tensor, custom_train_loader.val_labels_tensor, batch_size, num_features)
    
            print(f'Epoch {epoch + 1}, Training Loss: {avg_train_loss}, Validation Loss: {avg_val_loss}')
            print(f'Epoch {epoch + 1}, Training Loss: {train_reg_loss / len(custom_train_loader.train_data_tensor)}, Validation Loss: {avg_val_loss}')
            print(f'Training Accuracy: {train_accuracy}, Training F1 Score: {train_f1}')
            print(f'Validation Accuracy: {val_accuracy}, Validation F1 Score: {val_f1}')
            for param_group in optimizer.param_groups:
                print("Learning Rate:", param_group['lr'])
                
            print()
            
            if avg_val_loss < best_val_loss:
                best_val_loss = avg_val_loss
                best_epoch = epoch + 1
                patience_counter = 0
            else:
                patience_counter += 10
                if patience_counter >= early_stopping_patience:
                    print(f'Early stopping triggered after {epoch + 1} epochs.')
                    print(f'Best Validation Loss: {best_val_loss} from Epoch {best_epoch}')
                    break

    if patience_counter < early_stopping_patience:
        print(f'Best Validation Loss after {num_epochs} epochs: {best_val_loss} from Epoch {best_epoch}')

In [10]:
data_dl = pd.read_csv('/kaggle/input/playground-series-s4e10/train.csv')
data_og = pd.read_csv('/kaggle/input/loan-approval-prediction/credit_risk_dataset.csv')

data_dl = data_dl.drop(["id"], axis=1)

median_emp_length = data_og['person_emp_length'].median()
median_int_rate = data_og['loan_int_rate'].median()

data_dl['source'] = 0
data_og['source'] = 1

data = pd.concat([data_dl, data_og], ignore_index=True)

In [11]:
data['person_emp_length_missing'] = data['person_emp_length'].isna().astype(int)
data['loan_int_rate_missing'] = data['loan_int_rate'].isna().astype(int)

data['person_emp_length'] = data['person_emp_length'].fillna(median_emp_length)
data['loan_int_rate'] = data['loan_int_rate'].fillna(median_int_rate)

# grade_mapping = {'A': 7, 'B': 6, 'C': 5, 'D': 4, 'E': 3, 'F': 2, 'G': 1}
# data['loan_grade'] = data['loan_grade'].map(grade_mapping)

# purpose_mapping = {
#     'DEBTCONSOLIDATION': 1,
#     'HOMEIMPROVEMENT': 2,
#     'MEDICAL': 3,
#     'PERSONAL': 4,
#     'EDUCATION': 5,
#     'VENTURE': 6
# }
# data['loan_intent'] = data['loan_intent'].map(purpose_mapping)

# home_ownership_mapping = {
#     'OWN': 1,
#     'MORTGAGE': 2,
#     'OTHER': 3,
#     'RENT': 4
# }
# data['person_home_ownership'] = data['person_home_ownership'].map(home_ownership_mapping)

X = data.drop(["loan_status"], axis=1)
X = pd.get_dummies(X, drop_first=True)
y = data["loan_status"]

column_to_log = [
    'person_age',
    'person_income',
]

column_to_sqrt = [
    'person_emp_length',
    'loan_percent_income',
]

for col in column_to_log:
    if (X[col] <= 0).any():
        print(f"Column '{col}' contains non-positive values. Adding 1 to avoid log of non-positive numbers.")
        X[col] = np.log(X[col] + 1)
    else:
        X[col] = np.log(X[col])

for col in column_to_sqrt:
    if (X[col] < 0).any():
        print(f"Column '{col}' contains negative values. Setting negative values to NaN before applying sqrt.")
        X[col] = np.sqrt(X[col].clip(lower=0))
    else:
        X[col] = np.sqrt(X[col])

print(data.isnull().sum())
print(X.columns)
print(X.shape, y.shape)
print(X.columns.get_loc('source'))

person_age                    0
person_income                 0
person_home_ownership         0
person_emp_length             0
loan_intent                   0
loan_grade                    0
loan_amnt                     0
loan_int_rate                 0
loan_percent_income           0
cb_person_default_on_file     0
cb_person_cred_hist_length    0
loan_status                   0
source                        0
person_emp_length_missing     0
loan_int_rate_missing         0
dtype: int64
Index(['person_age', 'person_income', 'person_emp_length', 'loan_amnt',
       'loan_int_rate', 'loan_percent_income', 'cb_person_cred_hist_length',
       'source', 'person_emp_length_missing', 'loan_int_rate_missing',
       'person_home_ownership_OTHER', 'person_home_ownership_OWN',
       'person_home_ownership_RENT', 'loan_intent_EDUCATION',
       'loan_intent_HOMEIMPROVEMENT', 'loan_intent_MEDICAL',
       'loan_intent_PERSONAL', 'loan_intent_VENTURE', 'loan_grade_B',
       'loan_grade_C', 'loa

In [12]:
x_scaler = StandardScaler()
x_scaled = x_scaler.fit_transform(X)

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

print(x_scaled.shape)

(91226, 25)


In [13]:
x_scaled = x_scaler.transform(X)

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

print(x_scaled.shape)

(91226, 25)


In [14]:
feature_means = x_scaled.mean(axis=0)
feature_variances = x_scaled.var(axis=0)
feature_mins = x_scaled.min(axis=0)
feature_maxs = x_scaled.max(axis=0)

feature_stats_scaled_full = pd.DataFrame({
    'Mean': feature_means,
    'Variance': feature_variances,
    'Min': feature_mins,
    'Max': feature_maxs
})

print("Mean, Variance, Min, and Max of Scaled Features:")
print(feature_stats_scaled_full)

Mean, Variance, Min, and Max of Scaled Features:
            Mean  Variance       Min        Max
0  -3.289997e-16       1.0 -1.552712   8.591255
1  -1.420680e-16       1.0 -5.315235   9.344027
2   3.987875e-17       1.0 -1.859550   8.913061
3  -1.333446e-16       1.0 -1.513249   4.385625
4   9.327889e-16       1.0 -1.759487   4.065809
5  -5.358707e-16       1.0 -3.245038   4.413749
6   6.480297e-17       1.0 -0.943500   5.989958
7  -1.944089e-16       1.0 -0.745361   1.341632
8  -4.610980e-17       1.0 -0.099539  10.046317
9   6.729539e-17       1.0 -0.188056   5.317578
10  4.673291e-18       1.0 -0.046402  21.550842
11  7.477266e-17       1.0 -0.258691   3.865621
12 -6.698384e-17       1.0 -1.031790   0.969189
13  3.489391e-17       1.0 -0.508188   1.967776
14 -7.103402e-17       1.0 -0.348605   2.868576
15  1.217003e-17       1.0 -0.478658   2.089176
16  3.115527e-19       1.0 -0.453072   2.207155
17  8.801365e-18       1.0 -0.456460   2.190774
18  2.141925e-17       1.0 -0.714835   

In [15]:
class CustomLoss(nn.Module):
    def __init__(self, criterion, f1_lambda, f2_lambda, l1_lambda, l2_lambda, wa_lambda):
        super(CustomLoss, self).__init__()
        self.criterion = criterion
        self.f1_lambda = f1_lambda
        self.f2_lambda = f2_lambda
        self.l1_lambda = l1_lambda
        self.l2_lambda = l2_lambda
        self.wa_lambda = wa_lambda
        self.i = 0

    def forward(self, outputs, labels, model): 
        f1_loss = 0.0
        f2_loss = 0.0
        l1_loss = 0.0
        l2_loss = 0.0

        # for name, module in model.named_modules():
        #     if isinstance(module, CustomActivation):
        #         f1_loss += (module.a ** 2).sum() + (module.b ** 2).sum()
        #         f2_loss += ((module.a - module.b) ** 2).sum()

        #     if isinstance(module, nn.Linear):
        #         l1_loss += torch.norm(module.weight, 1)
        #         l2_loss += torch.norm(module.weight, 2) ** 2

        total_loss = (self.criterion(outputs, labels)
                      + self.f1_lambda * f1_loss
                      + self.f2_lambda * f2_loss
                      + self.l1_lambda * l1_loss
                      + self.l2_lambda * l2_loss)
        self.i += 1

        return total_loss

    def compute_gradient_magnitude(self, model):
        total_abs_sum = 0.0
        for param in model.parameters():
            if param.grad is not None:
                total_abs_sum += param.grad.abs().sum().item()
        self.grad_magnitude = total_abs_sum

    def regular_loss(self, outputs, labels):
        return self.criterion(outputs, labels)

In [None]:
# class CustomActivation1d_2(nn.Module):
#     def __init__(self, num_features, init_identity=True):
#         super(CustomActivation1d_2, self).__init__()
#         num_control_points = 2
        
#         self.a = nn.Parameter(torch.zeros(num_features, num_control_points))
#         self.b = nn.Parameter(torch.zeros(num_features, num_control_points))

#         self.local_bias = nn.Parameter(torch.zeros(num_features, num_control_points))
#         self.global_bias = nn.Parameter(torch.zeros(1, num_features))

#         with torch.no_grad():
#             random_tensor = torch.randn(num_features) / 2
#             self.a[:, 0] = random_tensor
#             self.b[:, 0] = random_tensor

#             random_tensor = torch.randn(num_features) / 2
#             self.a[:, 1] = random_tensor
#             self.b[:, 1] = random_tensor

#             self.local_bias[:, 0] = norm.ppf(1/3)
#             self.local_bias[:, 1] = norm.ppf(2/3)

#         """
#         with torch.no_grad():
#             if init_identity:
#         """
        
#     def forward(self, x):
#         x = x.unsqueeze(-1) + self.local_bias
#         x = torch.where(x < 0, self.a * x, self.b * x)
#         x = x.sum(dim=-1) + self.global_bias            
#         return x

In [16]:
class CustomActivation1d(nn.Module):
    def __init__(self, num_features, init_identity=False):
        super(CustomActivation1d, self).__init__()
        self.a = nn.Parameter(torch.cat([torch.zeros(25), torch.ones(num_features - 25)]))
        self.b = nn.Parameter(torch.cat([torch.zeros(25), torch.ones(num_features - 25)]))

        random_tensor = torch.randn(num_features)
        self.a = nn.Parameter(random_tensor)
        self.b = nn.Parameter(random_tensor)

        self.a = nn.Parameter(torch.zeros(num_features))
        self.b = nn.Parameter(torch.zeros(num_features))

        self.local_bias = nn.Parameter(torch.zeros(num_features))
        self.global_bias = nn.Parameter(torch.tensor(0.0))

    def forward(self, x):
        x = x + self.local_bias
        x = torch.where(x < 0, self.a * x, self.b * x)
        return x.sum(dim=-1) + self.global_bias

In [24]:
class CustomActivation1d_2(nn.Module):
    def __init__(self, num_features, init_identity=False):
        super(CustomActivation1d_2, self).__init__()
        random_tensor = torch.randn(num_features)
        self.a = nn.Parameter(random_tensor)
        self.b = nn.Parameter(random_tensor)

        self.a = nn.Parameter(torch.zeros(num_features))
        self.b = nn.Parameter(torch.zeros(num_features))

        self.a = nn.Parameter(torch.tensor([1 if i % 2 == 0 else 0 for i in range(num_features)], dtype=torch.float32))
        self.b = nn.Parameter(torch.tensor([1 if i % 2 == 0 else 0 for i in range(num_features)], dtype=torch.float32))

        self.local_bias = nn.Parameter(torch.zeros(num_features))
        self.global_bias = nn.Parameter(torch.zeros(num_features))

    def forward(self, x):
        x = x + self.local_bias
        x = torch.where(x < 0, self.a * x, self.b * x)
        return x + self.global_bias

In [None]:
class CustomActivation(nn.Module):
    def __init__(self, num_features):
        super(CustomActivation, self).__init__()
        self.a = nn.Parameter(torch.zeros(num_features, num_features))
        self.b = nn.Parameter(torch.zeros(num_features, num_features))
        
        self.local_bias = nn.Parameter(torch.zeros(num_features, num_features))
        self.global_bias = nn.Parameter(torch.zeros(num_features))

        with torch.no_grad():
            self.a.fill_diagonal_(1)
            self.b.fill_diagonal_(1)
            
    def forward(self, x):
        batch_size, num_features = x.shape
        x = x.unsqueeze(-1).expand(-1, -1, num_features)
        x = x + self.local_bias
        x = torch.where(x < 0, self.a * x, self.b * x)
        return x.sum(dim=1) + self.global_bias

In [None]:
class CustomLinear(nn.Module):
    def __init__(self, num_features, num_outputs, init_identity=False):
        super(CustomLinear, self).__init__()
        
        if init_identity and num_features != num_outputs:
            raise ValueError("For identity initialization, num_features must equal num_outputs.")

        self.linear = nn.Linear(num_features, num_outputs, bias=True)
        
        with torch.no_grad():
            self.linear.bias.zero_()

            if init_identity:
                self.linear.weight.copy_(torch.eye(num_features, num_outputs))
            else:
                self.linear.weight.zero_()

    def forward(self, x):
        return self.linear(x)

In [None]:
"""
class TabularDenseNet(nn.Module):
    def __init__(self, input_size, output_size, num_control_points, num_layers, window_size):
        super(TabularDenseNet, self).__init__()
        self.layers = nn.ModuleList()
        
        if num_layers % 2 == 1:
            self.layers.append(CustomLinear(input_size, input_size, init_identity=True))
            # self.layers.append(CustomActivation(input_size, window_size, num_control_points, init_identity=True))
            num_layers -= 1
            input_size *= 2
            
        for i in range(num_layers):
            if i % 2 == 0:
                self.layers.append(CustomLinear(input_size, input_size, init_identity=True))
            else:
                # self.layers.append(CustomActivation(input_size, window_size, num_control_points, init_identity=True))
                self.layers.append(CustomLinear(input_size, input_size, init_identity=True))

            input_size *= 2

        self.final = CustomLinear(input_size, output_size, init_identity=False)
        self.final_act = CustomActivation(output_size, window_size, num_control_points, init_identity=True)
        
    def forward(self, x):
        outputs = [x]

        for layer in self.layers:
            concatenated_outputs = torch.cat(outputs, dim=-1)
            outputs.append(F.relu(layer(concatenated_outputs)))

        concatenated_outputs = torch.cat(outputs, dim=-1)
        x = self.final(concatenated_outputs)
        x = self.final_act(x)
        return x
"""
print("")

In [None]:
class TabularDenseNet(nn.Module):
    def __init__(self, input_size, output_size, num_control_points, num_layers):
        super(TabularDenseNet, self).__init__()
        self.layers = nn.ModuleList()
        for i in range(num_layers):
            self.layers.append(CustomActivation(input_size))
            input_size *= 2

        self.final_layer = CustomActivation1d(input_size)

    def forward(self, x):
        outputs = [x]

        for layer in self.layers:
            concatenated_outputs = torch.cat(outputs, dim=-1)
            outputs.append(layer(concatenated_outputs))

        final_out = self.final_layer(torch.cat(outputs, dim=-1))
        return torch.stack([final_out, -final_out], dim=-1)

    # def forward(self, x):
    #     outputs = [x]
    #     summed_total = torch.zeros(x.size(0), device=x.device)
    #     summed_total += self.bias

    #     for layer, layer_norm in zip(self.layers, self.layer_norms):
    #         concatenated_outputs = torch.cat(outputs, dim=-1)
    #         inter_features = layer_norm(layer(concatenated_outputs))
    #         summed_feature = inter_features.sum(dim=-1)

    #         outputs.append(inter_features)
    #         outputs.append(summed_feature.unsqueeze(-1))
    #         summed_total += summed_feature

    #     print(torch.cat(outputs, dim=-1).shape)
    #     return torch.stack([summed_total, -summed_total], dim=-1)

    # def forward(self, x):
    #     batch_size = x.size(0)
    #     device = x.device
    #     L = self.num_layers
    #     D = self.input_dim

    #     total_features = 6656
    #     concatenated_features = torch.zeros(batch_size, total_features, device=device)

    #     concatenated_features[:, :D] = x

    #     current_pos = D
    #     summed_total = self.bias.expand(batch_size)

    #     for i, (layer, layer_norm) in enumerate(zip(self.layers, self.layer_norms)):
    #         current_features = concatenated_features[:, :current_pos]
    #         inter_features = (layer(current_features))
    #         summed_feature = inter_features.sum(dim=-1, keepdim=True)

    #         concatenated_features[:, current_pos:current_pos + current_pos] = inter_features
    #         concatenated_features[:, current_pos + current_pos + 1] = summed_feature.squeeze(-1)

    #         current_pos += current_pos + 1
    #         summed_total = summed_total + summed_feature.squeeze(-1)

    #     return torch.stack([summed_total, -summed_total], dim=-1)

In [25]:
class TabularDenseNet(nn.Module):
    def __init__(self, input_size, output_size, num_layers):
        super(TabularDenseNet, self).__init__()
        self.layers = nn.ModuleList()

        for i in range(num_layers):
            self.layers.append(CustomActivation1d_2(input_size * input_size * 2))
            input_size += input_size * input_size

        self.final_layer = CustomActivation1d(input_size, init_identity=False)

    def forward(self, x):
        outputs = [x]

        for i in range (len(self.layers)):
            x = torch.cat(outputs, dim=-1)
            
            # idx = torch.combinations(torch.arange(concatenated_outputs.size(1)), r=2, with_replacement=True)
            # pairs = concatenated_outputs[:, idx]

            # pairs_flat = pairs.view(pairs.size(0), -1)
            # output = self.layers[i](pairs_flat)

            # pairs_unflat = output.view(pairs.size(0), pairs.size(1), 2)
            # summed_output = pairs_unflat.sum(dim=-1)

            x_i = x.unsqueeze(2).expand(x.size(0), x.size(1), x.size(1))
            x_j = x.unsqueeze(1).expand(x.size(0), x.size(1), x.size(1))
            
            pairs_4d = torch.stack([x_i, x_j], dim=-1)
            pairs_2d = pairs_4d.view(x.size(0), x.size(1) * x.size(1), 2)

            pairs_flat = pairs_2d.view(pairs_2d.size(0), -1)
            output = self.layers[i](pairs_flat)

            pairs_unflat = output.view(pairs_2d.size(0), pairs_2d.size(1), 2)
            summed_output = pairs_unflat.sum(dim=-1)

            outputs.append(summed_output)

        final_out = self.final_layer(torch.cat(outputs, dim=-1))
        return torch.stack([final_out, -final_out], dim=-1)

In [None]:
# import torch
# import torch.nn as nn

# # 1. Create a random input tensor of shape (batch_size, num_features).
# batch_size = 2
# num_features = 4
# concatenated_outputs = torch.randn(batch_size, num_features)
# print("concatenated_outputs:", concatenated_outputs.shape)

# # 2. Generate all (i, j) combinations with i <= j.
# idx = torch.combinations(torch.arange(num_features), r=2, with_replacement=True)
# # idx has shape (M, 2), where M = num_features * (num_features + 1) // 2
# print("idx shape:", idx.shape)

# # 3. Gather the pairs. pairs has shape (B, M, 2).
# pairs = concatenated_outputs[:, idx]  
# print("pairs shape:", pairs.shape)
# print(pairs)

# # 4. Flatten from (B, M, 2) -> (B, M*2).
# pairs_flat = pairs.view(pairs.size(0), -1)  
# print("pairs_flat shape:", pairs_flat.shape)

# # 5. Reshape (unflatten) back to (B, M, 2).
# pairs_unflat = pairs_flat.view(pairs.size(0), pairs.size(1), 2)
# print("pairs_unflat shape:", pairs_unflat.shape)
# print(pairs_unflat)

# # 6. Sum across the last dimension to get (B, M).
# summed_output = pairs_unflat.sum(dim=-1)
# print("summed_output shape:", summed_output.shape)
# print("summed_output:", summed_output)


In [26]:
custom_train_loader = CustomDataLoader(x_scaled, y_encoded, validation_size=0.2, random_state=0, classification=True)
print(custom_train_loader.train_data_tensor.shape)

torch.Size([72980, 25])


In [27]:
num_features = 25
num_classes = 2
num_epochs = 10000
batch_size = 72980 * 1

In [28]:
torch.cuda.empty_cache()

In [31]:
def custom_lr_lambda(step):
    num_step_threshold = 100

    if step < num_step_threshold:
        return step / num_step_threshold
    if step == num_step_threshold:
        print("here")
    return 0.99999 ** (step - num_step_threshold)

start_time = time.time()
model = TabularDenseNet(num_features, num_classes, 1).to(device)

optimizer = optim.Rprop(model.parameters(), lr=0.001 * 0.001 * 0.001 * 0.001 * 0.001 * 0.001 * 1.0)
scheduler = LambdaLR(optimizer, lr_lambda=custom_lr_lambda)

criterion = CustomLoss(nn.CrossEntropyLoss(), 0.0, 0.0, 0.0, 0.0, 0.0)
evaluate_model(model, custom_train_loader, criterion, optimizer, 10000, scheduler, batch_size, num_features, early_stopping_patience=10000)

print(f"Execution time: {(time.time() - start_time):.6f} seconds")

Epoch 1, Training Loss: 0.6931450963020325, Validation Loss: 0.6928876042366028
Epoch 1, Training Loss: 0.6928873062133789, Validation Loss: 0.6928876042366028
Training Accuracy: 0.8316388051520964, Training F1 Score: 0.756607740583251
Validation Accuracy: 0.8317439438781102, Validation F1 Score: 0.7568499944612349
Learning Rate: 1.0000000000000002e-20

Epoch 11, Training Loss: 0.6864651441574097, Validation Loss: 0.684879720211029
Epoch 11, Training Loss: 0.6848864555358887, Validation Loss: 0.684879720211029
Training Accuracy: 0.8316388051520964, Training F1 Score: 0.756607740583251
Validation Accuracy: 0.8317439438781102, Validation F1 Score: 0.7568499944612349
Learning Rate: 1.1000000000000004e-19

Epoch 21, Training Loss: 0.6474131345748901, Validation Loss: 0.6386346817016602
Epoch 21, Training Loss: 0.6386884450912476, Validation Loss: 0.6386346817016602
Training Accuracy: 0.8316388051520964, Training F1 Score: 0.756607740583251
Validation Accuracy: 0.8317439438781102, Validatio

In [23]:
evaluate_model(model, custom_train_loader, criterion, optimizer, 1000, scheduler, batch_size, num_features, early_stopping_patience=10000)

Epoch 1, Training Loss: 0.1854117512702942, Validation Loss: 0.18686872720718384
Epoch 1, Training Loss: 0.1854117512702942, Validation Loss: 0.18686872720718384
Training Accuracy: 0.9434913674979446, Training F1 Score: 0.9403759696773992
Validation Accuracy: 0.9433848514742957, Validation F1 Score: 0.9404548639246102
Learning Rate: 0.0009057332022558413

Epoch 11, Training Loss: 0.18541158735752106, Validation Loss: 0.1868654191493988
Epoch 11, Training Loss: 0.18541166186332703, Validation Loss: 0.1868654191493988
Training Accuracy: 0.9434913674979446, Training F1 Score: 0.9403734768845661
Validation Accuracy: 0.9434396580072345, Validation F1 Score: 0.9405076016647023
Learning Rate: 0.0009056426330113069

Epoch 21, Training Loss: 0.18541155755519867, Validation Loss: 0.1868695467710495
Epoch 21, Training Loss: 0.18541155755519867, Validation Loss: 0.1868695467710495
Training Accuracy: 0.9434913674979446, Training F1 Score: 0.9403759696773992
Validation Accuracy: 0.9433848514742957, 

In [None]:
def custom_lr_lambda(step):
    num_step_threshold = 100

    if step < num_step_threshold:
        return step / num_step_threshold
    if step == num_step_threshold:
        print("here")
    return 0.99999 ** (step - num_step_threshold)

start_time = time.time()
model = TabularDenseNet(num_features, num_classes, 1, 1).to(device)

optimizer = optim.Adam(model.parameters(), lr=0.001 * 1.0)
scheduler = LambdaLR(optimizer, lr_lambda=custom_lr_lambda)

criterion = CustomLoss(nn.CrossEntropyLoss(), 0.0, 0.0, 0.0, 0.0, 0.0)
evaluate_model(model, custom_train_loader, criterion, optimizer, 1000, scheduler, batch_size, num_features, early_stopping_patience=10000)

print(f"Execution time: {(time.time() - start_time):.6f} seconds")

In [None]:
def custom_lr_lambda(step):
    num_step_threshold = 100

    if step < num_step_threshold:
        return step / num_step_threshold
    if step == num_step_threshold:
        print("here")
    return 0.99999 ** (step - num_step_threshold)

start_time = time.time()
model = TabularDenseNet(num_features, num_classes, 1, 1).to(device)

optimizer = optim.Adam(model.parameters(), lr=0.001 * 1.0)
scheduler = LambdaLR(optimizer, lr_lambda=custom_lr_lambda)

criterion = CustomLoss(nn.CrossEntropyLoss(), 0.0, 0.0, 0.0, 0.0, 0.0)
evaluate_model(model, custom_train_loader, criterion, optimizer, 1000, scheduler, batch_size, num_features, early_stopping_patience=10000)

print(f"Execution time: {(time.time() - start_time):.6f} seconds")

In [21]:
def custom_lr_lambda(step):
    num_step_threshold = 100

    if step < num_step_threshold:
        return step / num_step_threshold
    if step == num_step_threshold:
        print("here")
    return 0.99999 ** (step - num_step_threshold)

start_time = time.time()
model = TabularDenseNet(num_features, num_classes, 1).to(device)

optimizer = optim.Rprop(model.parameters(), lr=0.001 * 1.0)
scheduler = LambdaLR(optimizer, lr_lambda=custom_lr_lambda)

criterion = CustomLoss(nn.CrossEntropyLoss(), 0.0, 0.0, 0.0, 0.0, 0.0)
evaluate_model(model, custom_train_loader, criterion, optimizer, 1000, scheduler, batch_size, num_features, early_stopping_patience=10000)

print(f"Execution time: {(time.time() - start_time):.6f} seconds")

Epoch 1, Training Loss: 0.6931450963020325, Validation Loss: 0.6928876042366028
Epoch 1, Training Loss: 0.6928873062133789, Validation Loss: 0.6928876042366028
Training Accuracy: 0.8316388051520964, Training F1 Score: 0.756607740583251
Validation Accuracy: 0.8317439438781102, Validation F1 Score: 0.7568499944612349
Learning Rate: 1e-05

Epoch 11, Training Loss: 0.6864651441574097, Validation Loss: 0.684879720211029
Epoch 11, Training Loss: 0.6848864555358887, Validation Loss: 0.684879720211029
Training Accuracy: 0.8316388051520964, Training F1 Score: 0.756607740583251
Validation Accuracy: 0.8317439438781102, Validation F1 Score: 0.7568499944612349
Learning Rate: 0.00011

Epoch 21, Training Loss: 0.6474131345748901, Validation Loss: 0.6386346817016602
Epoch 21, Training Loss: 0.6386884450912476, Validation Loss: 0.6386346817016602
Training Accuracy: 0.8316388051520964, Training F1 Score: 0.756607740583251
Validation Accuracy: 0.8317439438781102, Validation F1 Score: 0.7568499944612349
L

In [22]:
torch.set_printoptions(sci_mode=False, precision=4)

for name, param in model.named_parameters():
    print(name)
    print(param)

layers.0.a
Parameter containing:
tensor([    1.0002,     0.0002,     1.0138,  ...,     0.0049,     1.0025,
            0.0025], device='cuda:0', requires_grad=True)
layers.0.b
Parameter containing:
tensor([     0.9958,     -0.0042,      6.1279,  ...,     -0.0746,
             0.9997,     -0.0003], device='cuda:0', requires_grad=True)
layers.0.local_bias
Parameter containing:
tensor([ 0.0049, -0.0026,  0.1162,  ..., -0.0040, -0.0050, -0.0020],
       device='cuda:0', requires_grad=True)
layers.0.global_bias
Parameter containing:
tensor([ 0.0049,  0.0049,  0.1151,  ..., -0.0049, -0.0050, -0.0050],
       device='cuda:0', requires_grad=True)
final_layer.a
Parameter containing:
tensor([     0.0022,      0.0187,      0.0080,     -0.0144,     -0.0107,
            -0.0851,      0.0004,     -0.0183,     -0.0036,     -0.0036,
            -0.0036,     -0.0029,     -0.0156,     -0.0029,     -0.0037,
            -0.0013,     -0.0015,     -0.0029,     -0.0044,     -0.0015,
            -0.0234,     

In [23]:
evaluate_model(model, custom_train_loader, criterion, optimizer, 1500, scheduler, batch_size, num_features, early_stopping_patience=10000)

Epoch 1, Training Loss: 0.18095000088214874, Validation Loss: 0.18156665563583374
Epoch 1, Training Loss: 0.18094487488269806, Validation Loss: 0.18156665563583374
Training Accuracy: 0.9449301178405043, Training F1 Score: 0.9419558064097058
Validation Accuracy: 0.9455223062589061, Validation F1 Score: 0.9428669898409447
Learning Rate: 0.0009910304237724693

Epoch 11, Training Loss: 0.18090243637561798, Validation Loss: 0.18150892853736877
Epoch 11, Training Loss: 0.18089835345745087, Validation Loss: 0.18150892853736877
Training Accuracy: 0.9449986297615786, Training F1 Score: 0.942021986079381
Validation Accuracy: 0.9456319193247835, Validation F1 Score: 0.9429633622234967
Learning Rate: 0.0009909313251896103

Epoch 21, Training Loss: 0.18084470927715302, Validation Loss: 0.18157550692558289
Epoch 21, Training Loss: 0.18084067106246948, Validation Loss: 0.18157550692558289
Training Accuracy: 0.9450534392984379, Training F1 Score: 0.942074938231588
Validation Accuracy: 0.94557711279184

In [24]:
torch.set_printoptions(sci_mode=False, precision=4, threshold=float('inf'))

for name, param in model.named_parameters():
    print(name)
    print(param)

layers.0.a
Parameter containing:
tensor([     0.9883,     -0.0006,      1.0117,      0.3140,      1.0142,
            -0.4910,      1.0037,     -0.1664,      3.7976,     -2.0244,
             1.0025,      0.1069,      1.0007,      0.4767,      1.0009,
             0.0096,      1.0050,     -0.0042,      9.1516,      2.7260,
             1.0064,     -0.0047,      0.9882,     -0.0039,      0.0218,
             2.1218,      0.9867,     -0.1380,     13.0822,     -4.0477,
             0.6460,     -1.3871,      0.9937,     -0.0067,      1.0020,
            -0.0384,      0.9998,      0.0274,      1.0190,     -0.0062,
             2.7541,     -7.8198,      1.0052,     -0.2447,      1.0007,
            -0.0143,      1.0029,     -0.0055,      0.9896,     -0.1530,
             1.0074,     -0.0142,      1.0081,      0.0088,      1.0011,
            -0.0763,      1.0063,     -1.8289,      1.0088,      2.6325,
             1.0088,     -2.6246,      1.0086,     -0.0191,      1.0055,
             0.130

In [32]:
val_loss = 0.0
with torch.no_grad():
    for start_idx in range(0, len(custom_train_loader.train_data_tensor), batch_size):
        end_idx = min(start_idx + batch_size, len(custom_train_loader.train_data_tensor))
        inputs = custom_train_loader.train_data_tensor[start_idx:end_idx].view(-1, num_features)
        labels = custom_train_loader.train_labels_tensor[start_idx:end_idx]

        outputs = model(inputs)

    for start_idx in range(0, len(custom_train_loader.val_data_tensor), batch_size):
        end_idx = min(start_idx + batch_size, len(custom_train_loader.val_data_tensor))
        val_inputs = custom_train_loader.val_data_tensor[start_idx:end_idx].view(-1, num_features)
        val_labels = custom_train_loader.val_labels_tensor[start_idx:end_idx]

        val_outputs = model(val_inputs)
        val_loss += criterion.regular_loss(val_outputs, val_labels).item() * len(val_labels)

avg_val_loss = val_loss / len(custom_train_loader.val_data_tensor)
val_accuracy, val_f1 = calculate_metrics(model, custom_train_loader.val_data_tensor, custom_train_loader.val_labels_tensor, batch_size, num_features)

print(avg_val_loss)
print(val_accuracy)
print(val_f1)

0.1798071414232254
0.9463992107859257
0.943814057054567


In [35]:
max_magnitude = -float("inf")
max_param_name = None
max_param_index = None

for name, param in model.named_parameters():
    if param.requires_grad:
        weights = param.data
        max_val, flat_index = torch.max(torch.abs(weights).view(-1), 0)
        if max_val > max_magnitude:
            max_magnitude = max_val
            max_param_name = name
            max_param_index = flat_index

if max_param_name is not None:
    for name, param in model.named_parameters():
        if name == max_param_name:
            max_param_coords = torch.unravel_index(max_param_index, param.data.shape)
            param.data[max_param_coords] = 0
            print(f"Modified parameter: {name}")
            print(f"Set the weight at {max_param_coords} (value {max_magnitude}) to 0.")

Modified parameter: layers.0.a
Set the weight at (tensor(451, device='cuda:0'),) (value 1035.06396484375) to 0.


In [41]:
val_loss = 0.0
with torch.no_grad():
    for start_idx in range(0, len(custom_train_loader.train_data_tensor), batch_size):
        end_idx = min(start_idx + batch_size, len(custom_train_loader.train_data_tensor))
        inputs = custom_train_loader.train_data_tensor[start_idx:end_idx].view(-1, num_features)
        labels = custom_train_loader.train_labels_tensor[start_idx:end_idx]

        outputs = model(inputs)

    for start_idx in range(0, len(custom_train_loader.val_data_tensor), batch_size):
        end_idx = min(start_idx + batch_size, len(custom_train_loader.val_data_tensor))
        val_inputs = custom_train_loader.val_data_tensor[start_idx:end_idx].view(-1, num_features)
        val_labels = custom_train_loader.val_labels_tensor[start_idx:end_idx]

        val_outputs = model(val_inputs)
        val_loss += criterion.regular_loss(val_outputs, val_labels).item() * len(val_labels)

avg_val_loss = val_loss / len(custom_train_loader.val_data_tensor)
val_accuracy, val_f1 = calculate_metrics(model, custom_train_loader.val_data_tensor, custom_train_loader.val_labels_tensor, batch_size, num_features)

print(avg_val_loss)
print(val_accuracy)
print(val_f1)

0.18261361122131348
0.9459607585224159
0.943427823487248


In [34]:
evaluate_model(model, custom_train_loader, criterion, optimizer, 1, scheduler, batch_size, num_features, early_stopping_patience=10000)

Epoch 1, Training Loss: 0.17613151669502258, Validation Loss: 0.1798071414232254
Epoch 1, Training Loss: 0.17613151669502258, Validation Loss: 0.1798071414232254
Training Accuracy: 0.9461633324198411, Training F1 Score: 0.9432768569107662
Validation Accuracy: 0.9463992107859257, Validation F1 Score: 0.943814057054567
Learning Rate: 9.057422596784384e-19

Best Validation Loss after 1 epochs: 0.1798071414232254 from Epoch 1


In [36]:
evaluate_model(model, custom_train_loader, criterion, optimizer, 1, scheduler, batch_size, num_features, early_stopping_patience=10000)

Epoch 1, Training Loss: 0.1761421412229538, Validation Loss: 0.17988230288028717
Epoch 1, Training Loss: 0.1761421412229538, Validation Loss: 0.17988230288028717
Training Accuracy: 0.9461496300356262, Training F1 Score: 0.9432683111627964
Validation Accuracy: 0.9463992107859257, Validation F1 Score: 0.9438231813505336
Learning Rate: 9.057422596784384e-19

Best Validation Loss after 1 epochs: 0.17988230288028717 from Epoch 1


In [37]:
max_magnitude = -float("inf")
max_param_name = None
max_param_index = None

for name, param in model.named_parameters():
    if param.requires_grad:
        weights = param.data
        max_val, flat_index = torch.max(torch.abs(weights).view(-1), 0)
        if max_val > max_magnitude:
            max_magnitude = max_val
            max_param_name = name
            max_param_index = flat_index

if max_param_name is not None:
    for name, param in model.named_parameters():
        if name == max_param_name:
            max_param_coords = torch.unravel_index(max_param_index, param.data.shape)
            param.data[max_param_coords] = 0
            print(f"Modified parameter: {name}")
            print(f"Set the weight at {max_param_coords} (value {max_magnitude}) to 0.")

Modified parameter: layers.0.a
Set the weight at (tensor(957, device='cuda:0'),) (value 587.677490234375) to 0.


In [38]:
evaluate_model(model, custom_train_loader, criterion, optimizer, 1, scheduler, batch_size, num_features, early_stopping_patience=10000)

Epoch 1, Training Loss: 0.17743363976478577, Validation Loss: 0.18066364526748657
Epoch 1, Training Loss: 0.17743363976478577, Validation Loss: 0.18066364526748657
Training Accuracy: 0.9455467251301727, Training F1 Score: 0.9427610227844412
Validation Accuracy: 0.9461251781212321, Validation F1 Score: 0.9436408663972071
Learning Rate: 9.057422596784384e-19

Best Validation Loss after 1 epochs: 0.18066364526748657 from Epoch 1


In [39]:
max_magnitude = -float("inf")
max_param_name = None
max_param_index = None

for name, param in model.named_parameters():
    if param.requires_grad:
        weights = param.data
        max_val, flat_index = torch.max(torch.abs(weights).view(-1), 0)
        if max_val > max_magnitude:
            max_magnitude = max_val
            max_param_name = name
            max_param_index = flat_index

if max_param_name is not None:
    for name, param in model.named_parameters():
        if name == max_param_name:
            max_param_coords = torch.unravel_index(max_param_index, param.data.shape)
            param.data[max_param_coords] = 0
            print(f"Modified parameter: {name}")
            print(f"Set the weight at {max_param_coords} (value {max_magnitude}) to 0.")

Modified parameter: final_layer.a
Set the weight at (tensor(198, device='cuda:0'),) (value 536.3334350585938) to 0.


In [40]:
evaluate_model(model, custom_train_loader, criterion, optimizer, 1, scheduler, batch_size, num_features, early_stopping_patience=10000)

Epoch 1, Training Loss: 0.17743617296218872, Validation Loss: 0.18093645572662354
Epoch 1, Training Loss: 0.17743617296218872, Validation Loss: 0.18093645572662354
Training Accuracy: 0.9455467251301727, Training F1 Score: 0.9427610227844412
Validation Accuracy: 0.9460703715882933, Validation F1 Score: 0.9435789896739646
Learning Rate: 9.057422596784384e-19

Best Validation Loss after 1 epochs: 0.18093645572662354 from Epoch 1


In [41]:
max_magnitude = -float("inf")
max_param_name = None
max_param_index = None

for name, param in model.named_parameters():
    if param.requires_grad:
        weights = param.data
        max_val, flat_index = torch.max(torch.abs(weights).view(-1), 0)
        if max_val > max_magnitude:
            max_magnitude = max_val
            max_param_name = name
            max_param_index = flat_index

if max_param_name is not None:
    for name, param in model.named_parameters():
        if name == max_param_name:
            max_param_coords = torch.unravel_index(max_param_index, param.data.shape)
            param.data[max_param_coords] = 0
            print(f"Modified parameter: {name}")
            print(f"Set the weight at {max_param_coords} (value {max_magnitude}) to 0.")

Modified parameter: layers.0.a
Set the weight at (tensor(109, device='cuda:0'),) (value 489.6275939941406) to 0.


In [42]:
evaluate_model(model, custom_train_loader, criterion, optimizer, 1, scheduler, batch_size, num_features, early_stopping_patience=10000)

Epoch 1, Training Loss: 0.1870776116847992, Validation Loss: 0.18896518647670746
Epoch 1, Training Loss: 0.1870776116847992, Validation Loss: 0.18896518647670746
Training Accuracy: 0.9441490819402576, Training F1 Score: 0.9411141995296339
Validation Accuracy: 0.9445905951989477, Validation F1 Score: 0.9417996059259088
Learning Rate: 9.057422596784384e-19

Best Validation Loss after 1 epochs: 0.18896518647670746 from Epoch 1


In [43]:
for i in range(20):
    max_magnitude = -float("inf")
    max_param_name = None
    max_param_index = None
    
    for name, param in model.named_parameters():
        if param.requires_grad:
            weights = param.data
            max_val, flat_index = torch.max(torch.abs(weights).view(-1), 0)
            if max_val > max_magnitude:
                max_magnitude = max_val
                max_param_name = name
                max_param_index = flat_index
    
    if max_param_name is not None:
        for name, param in model.named_parameters():
            if name == max_param_name:
                max_param_coords = torch.unravel_index(max_param_index, param.data.shape)
                param.data[max_param_coords] = 0
                print(f"Modified parameter: {name}")
                print(f"Set the weight at {max_param_coords} (value {max_magnitude}) to 0.")

    evaluate_model(model, custom_train_loader, criterion, optimizer, 1, scheduler, batch_size, num_features, early_stopping_patience=10000)

Modified parameter: layers.0.a
Set the weight at (tensor(653, device='cuda:0'),) (value 333.98663330078125) to 0.
Epoch 1, Training Loss: 0.254148006439209, Validation Loss: 0.24113979935646057
Epoch 1, Training Loss: 0.254148006439209, Validation Loss: 0.24113979935646057
Training Accuracy: 0.9422170457659633, Training F1 Score: 0.9393691431584095
Validation Accuracy: 0.9437684972048668, Validation F1 Score: 0.9411991655320133
Learning Rate: 9.057422596784384e-19

Best Validation Loss after 1 epochs: 0.24113979935646057 from Epoch 1
Modified parameter: layers.0.local_bias
Set the weight at (tensor(937, device='cuda:0'),) (value 219.10711669921875) to 0.
Epoch 1, Training Loss: 0.2540905773639679, Validation Loss: 0.24104681611061096
Epoch 1, Training Loss: 0.2540905773639679, Validation Loss: 0.24104681611061096
Training Accuracy: 0.9422307481501782, Training F1 Score: 0.9393921745858443
Validation Accuracy: 0.9437684972048668, Validation F1 Score: 0.9412180504655105
Learning Rate: 9.

In [37]:
import torch
import matplotlib.pyplot as plt
import seaborn as sns

weights = model.final_layer.a

print(torch.isinf(weights).any())
print(torch.isnan(weights).any())

weights_numpy = weights.cpu().detach().numpy()
sorted_weights = torch.sort(weights).values

print(sorted_weights)

tensor(False, device='cuda:0')
tensor(False, device='cuda:0')
tensor([    -1.3825,     -1.1956,     -1.1700,     -0.6541,     -0.5807,
            -0.5129,     -0.5006,     -0.4932,     -0.4788,     -0.4748,
            -0.4586,     -0.4538,     -0.4196,     -0.3088,     -0.3010,
            -0.2926,     -0.2878,     -0.2598,     -0.2198,     -0.2124,
            -0.2089,     -0.1993,     -0.1895,     -0.1884,     -0.1795,
            -0.1793,     -0.1752,     -0.1628,     -0.1507,     -0.1504,
            -0.1447,     -0.1440,     -0.1382,     -0.1379,     -0.1326,
            -0.1273,     -0.1234,     -0.1181,     -0.1163,     -0.1079,
            -0.1062,     -0.1048,     -0.1030,     -0.1010,     -0.0980,
            -0.0932,     -0.0875,     -0.0862,     -0.0847,     -0.0845,
            -0.0814,     -0.0781,     -0.0762,     -0.0736,     -0.0706,
            -0.0698,     -0.0607,     -0.0591,     -0.0564,     -0.0542,
            -0.0542,     -0.0528,     -0.0518,     -0.0480,   

In [42]:
optimizer = optim.Adam(model.parameters(), lr=0.001 * 0.1)
scheduler = LambdaLR(optimizer, lr_lambda=custom_lr_lambda)

criterion = CustomLoss(nn.CrossEntropyLoss(), 0.0, 0.0, 0.0, 0.0, 0.0)
evaluate_model(model, custom_train_loader, criterion, optimizer, 10000, scheduler, batch_size, num_features, early_stopping_patience=10000)

print(f"Execution time: {(time.time() - start_time):.6f} seconds")

Epoch 1, Training Loss: 0.17967186868190765, Validation Loss: 0.18261361122131348
Epoch 1, Training Loss: 0.17967186868190765, Validation Loss: 0.18261361122131348
Training Accuracy: 0.9452041655248014, Training F1 Score: 0.9424399521370476
Validation Accuracy: 0.9459607585224159, Validation F1 Score: 0.943427823487248
Learning Rate: 1.0000000000000002e-06

Epoch 11, Training Loss: 0.17950770258903503, Validation Loss: 0.18235042691230774
Epoch 11, Training Loss: 0.17949901521205902, Validation Loss: 0.18235042691230774
Training Accuracy: 0.9455878322828172, Training F1 Score: 0.9427204019955162
Validation Accuracy: 0.9461251781212321, Validation F1 Score: 0.9435313943392359
Learning Rate: 1.1000000000000001e-05

Epoch 21, Training Loss: 0.17938709259033203, Validation Loss: 0.1825421303510666
Epoch 21, Training Loss: 0.17938099801540375, Validation Loss: 0.1825421303510666
Training Accuracy: 0.9455330227459579, Training F1 Score: 0.9426982416433511
Validation Accuracy: 0.9460155650553

In [None]:
def custom_lr_lambda(step):
    num_step_threshold = 100

    if step < num_step_threshold:
        return step / num_step_threshold
    if step == num_step_threshold:
        print("here")
    return 0.99999 ** (step - num_step_threshold)

start_time = time.time()
model = TabularDenseNet(num_features, num_classes, 1, 1).to(device)

optimizer = optim.Adam(model.parameters(), lr=0.001 * 1.0)
scheduler = LambdaLR(optimizer, lr_lambda=custom_lr_lambda)

criterion = CustomLoss(nn.CrossEntropyLoss(), 0.0, 0.0, 0.0, 0.0, 0.0)
evaluate_model(model, custom_train_loader, criterion, optimizer, 10000, scheduler, batch_size, num_features, early_stopping_patience=10000)

print(f"Execution time: {(time.time() - start_time):.6f} seconds")

In [None]:
evaluate_model(model, custom_train_loader, criterion, optimizer, 100000, scheduler, batch_size, num_features, early_stopping_patience=100000000000)

In [None]:
def custom_lr_lambda(step):
    num_step_threshold = 100

    if step < num_step_threshold:
        return step / num_step_threshold
    if step == num_step_threshold:
        print("here")
    return 0.99999 ** (step - num_step_threshold)

start_time = time.time()
model = TabularDenseNet(num_features, num_classes, 1, 1).to(device)

optimizer = optim.Adam(model.parameters(), lr=0.001 * 1.0)
scheduler = LambdaLR(optimizer, lr_lambda=custom_lr_lambda)

criterion = CustomLoss(nn.CrossEntropyLoss(), 0.0, 0.0, 0.0, 0.0, 0.0)
evaluate_model(model, custom_train_loader, criterion, optimizer, 10000, scheduler, batch_size, num_features, early_stopping_patience=10000)

print(f"Execution time: {(time.time() - start_time):.6f} seconds")

In [None]:
def custom_lr_lambda(step):
    num_step_threshold = 100

    if step < num_step_threshold:
        return step / num_step_threshold
    if step == num_step_threshold:
        print("here")
    return 0.99999 ** (step - num_step_threshold)

start_time = time.time()
model = TabularDenseNet(num_features, num_classes, 1, 1).to(device)

optimizer = optim.Adam(model.parameters(), lr=0.001 * 1.0)
scheduler = LambdaLR(optimizer, lr_lambda=custom_lr_lambda)

criterion = CustomLoss(nn.CrossEntropyLoss(), 0.0, 0.0, 0.0, 0.0, 0.0)
evaluate_model(model, custom_train_loader, criterion, optimizer, 10000, scheduler, batch_size, num_features, early_stopping_patience=10000)

print(f"Execution time: {(time.time() - start_time):.6f} seconds")

In [None]:
for name, param in model.named_parameters():
    print(name)
    print(param)

In [None]:
def custom_lr_lambda(step):
    num_step_threshold = 100

    if step < num_step_threshold:
        return step / num_step_threshold
    if step == num_step_threshold:
        print("here")
    return 0.99999 ** (step - num_step_threshold)

start_time = time.time()
model = TabularDenseNet(num_features, num_classes, 1, 2).to(device)

optimizer = optim.Adam(model.parameters(), lr=0.001 * 0.1)
scheduler = LambdaLR(optimizer, lr_lambda=custom_lr_lambda)

criterion = CustomLoss(nn.CrossEntropyLoss(), 0.0, 0.0, 0.0, 0.0, 0.0)
evaluate_model(model, custom_train_loader, criterion, optimizer, 10000, scheduler, batch_size, num_features, early_stopping_patience=10000)

print(f"Execution time: {(time.time() - start_time):.6f} seconds")

In [None]:
def custom_lr_lambda(step):
    num_step_threshold = 100

    if step < num_step_threshold:
        return step / num_step_threshold
    if step == num_step_threshold:
        print("here")
    return 0.99995 ** (step - num_step_threshold)

start_time = time.time()
model = TabularDenseNet(num_features, num_classes, 1, 2).to(device)

optimizer = optim.Adam(model.parameters(), lr=0.001 * 0.1)
scheduler = LambdaLR(optimizer, lr_lambda=custom_lr_lambda)

criterion = CustomLoss(nn.CrossEntropyLoss(), 0.0, 0.0, 0.0, 0.0, 0.0)
evaluate_model(model, custom_train_loader, criterion, optimizer, 10000, scheduler, batch_size, num_features, early_stopping_patience=10000)

print(f"Execution time: {(time.time() - start_time):.6f} seconds")

In [None]:
def custom_lr_lambda(step):
    num_step_threshold = 100

    if step < num_step_threshold:
        return step / num_step_threshold
    if step == num_step_threshold:
        print("here")
    return 0.99995 ** (step - num_step_threshold)

start_time = time.time()
model = TabularDenseNet(num_features, num_classes, 1, 1).to(device)

optimizer = optim.Adam(model.parameters(), lr=0.001 * 1.0)
scheduler = LambdaLR(optimizer, lr_lambda=custom_lr_lambda)

criterion = CustomLoss(nn.CrossEntropyLoss(), 0.0, 0.0, 0.0, 0.0, 0.0)
evaluate_model(model, custom_train_loader, criterion, optimizer, 1000, scheduler, batch_size, num_features, early_stopping_patience=10000)

print(f"Execution time: {(time.time() - start_time):.6f} seconds")

In [None]:
def custom_lr_lambda(step):
    num_step_threshold = 100

    if step < num_step_threshold:
        return step / num_step_threshold
    if step == num_step_threshold:
        print("here")
    return 0.99995 ** (step - num_step_threshold)

start_time = time.time()
model = TabularDenseNet(num_features, num_classes, 1, 2).to(device)

optimizer = optim.Adam(model.parameters(), lr=0.001 * 0.1)
scheduler = LambdaLR(optimizer, lr_lambda=custom_lr_lambda)

criterion = CustomLoss(nn.CrossEntropyLoss(), 0.0, 0.0, 0.0, 0.0, 0.0)
evaluate_model(model, custom_train_loader, criterion, optimizer, 100, scheduler, batch_size, num_features, early_stopping_patience=10000)

print(f"Execution time: {(time.time() - start_time):.6f} seconds")

In [None]:
def custom_lr_lambda(step):
    num_step_threshold = 100

    if step < num_step_threshold:
        return step / num_step_threshold
    if step == num_step_threshold:
        print("here")
    return 0.99995 ** (step - num_step_threshold)

start_time = time.time()
model = TabularDenseNet(num_features, num_classes, 1, 2).to(device)

optimizer = optim.Adam(model.parameters(), lr=0.001 * 0.1)
scheduler = LambdaLR(optimizer, lr_lambda=custom_lr_lambda)

criterion = CustomLoss(nn.CrossEntropyLoss(), 0.0, 0.0, 0.0, 0.0, 0.0)
evaluate_model(model, custom_train_loader, criterion, optimizer, 100, scheduler, batch_size, num_features, early_stopping_patience=10000)

print(f"Execution time: {(time.time() - start_time):.6f} seconds")

In [None]:
def custom_lr_lambda(step):
    num_step_threshold = 100

    if step < num_step_threshold:
        return step / num_step_threshold
    if step == num_step_threshold:
        print("here")
    return 0.99995 ** (step - num_step_threshold)

start_time = time.time()
model = TabularDenseNet(num_features, num_classes, 1, 2).to(device)

optimizer = optim.Adam(model.parameters(), lr=0.001 * 0.1)
scheduler = LambdaLR(optimizer, lr_lambda=custom_lr_lambda)

criterion = CustomLoss(nn.CrossEntropyLoss(), 0.0, 0.0, 0.0, 0.0, 0.0)
evaluate_model(model, custom_train_loader, criterion, optimizer, 100, scheduler, batch_size, num_features, early_stopping_patience=10000)

print(f"Execution time: {(time.time() - start_time):.6f} seconds")

In [None]:
def custom_lr_lambda(step):
    num_step_threshold = 100

    if step < num_step_threshold:
        return step / num_step_threshold
    if step == num_step_threshold:
        print("here")
    return 0.99995 ** (step - num_step_threshold)

start_time = time.time()
model = TabularDenseNet(num_features, num_classes, 1, 2).to(device)

optimizer = optim.Adam(model.parameters(), lr=0.001 * 0.1)
scheduler = LambdaLR(optimizer, lr_lambda=custom_lr_lambda)

criterion = CustomLoss(nn.CrossEntropyLoss(), 0.0, 0.0, 0.0, 0.0, 0.0)
evaluate_model(model, custom_train_loader, criterion, optimizer, 100, scheduler, batch_size, num_features, early_stopping_patience=10000)

print(f"Execution time: {(time.time() - start_time):.6f} seconds")

In [None]:
def custom_lr_lambda(step):
    num_step_threshold = 100

    if step < num_step_threshold:
        return step / num_step_threshold
    if step == num_step_threshold:
        print("here")
    return 0.99995 ** (step - num_step_threshold)

start_time = time.time()
model = TabularDenseNet(num_features, num_classes, 1, 2).to(device)

optimizer = optim.Adam(model.parameters(), lr=0.001 * 1.0)
scheduler = LambdaLR(optimizer, lr_lambda=custom_lr_lambda)

criterion = CustomLoss(nn.CrossEntropyLoss(), 0.0, 0.0, 0.0, 0.0, 0.0)
evaluate_model(model, custom_train_loader, criterion, optimizer, 1000, scheduler, batch_size, num_features, early_stopping_patience=10000)

print(f"Execution time: {(time.time() - start_time):.6f} seconds")

In [None]:
def custom_lr_lambda(step):
    num_step_threshold = 100

    if step < num_step_threshold:
        return step / num_step_threshold
    if step == num_step_threshold:
        print("here")
    return 0.99995 ** (step - num_step_threshold)

start_time = time.time()
model = TabularDenseNet(num_features, num_classes, 1, 8).to(device)

optimizer = optim.Adam(model.parameters(), lr=0.001 * 0.01)
scheduler = LambdaLR(optimizer, lr_lambda=custom_lr_lambda)

criterion = CustomLoss(nn.CrossEntropyLoss(), 0.0, 0.0, 0.0, 0.0, 0.0)
evaluate_model(model, custom_train_loader, criterion, optimizer, 1000, scheduler, batch_size, num_features, early_stopping_patience=10000)

print(f"Execution time: {(time.time() - start_time):.6f} seconds")

In [None]:
def custom_lr_lambda(step):
    num_step_threshold = 100

    if step < num_step_threshold:
        return step / num_step_threshold
    if step == num_step_threshold:
        print("here")
    return 0.99995 ** (step - num_step_threshold)

start_time = time.time()
model = TabularDenseNet(num_features, num_classes, 1, 10).to(device)

optimizer = optim.Adam(model.parameters(), lr=0.001 * 0.01)
scheduler = LambdaLR(optimizer, lr_lambda=custom_lr_lambda)

criterion = CustomLoss(nn.CrossEntropyLoss(), 0.0, 0.0, 0.0, 0.0, 0.0)
evaluate_model(model, custom_train_loader, criterion, optimizer, 1000, scheduler, batch_size, num_features, early_stopping_patience=10000)

print(f"Execution time: {(time.time() - start_time):.6f} seconds")

In [None]:
total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Total number of trainable parameters: {total_params}")

In [None]:
total_params = sum(p.numel() for p in model.parameters())
print(f"Total number of parameters (trainable + non-trainable): {total_params}")

In [None]:
for name, param in model.named_parameters():
    print(name)
    print(param)

In [None]:
data = pd.read_csv('/kaggle/input/playground-series-s4e10/test.csv')

data = data.drop(["id"], axis=1)
data['source'] = 0

data['person_emp_length_missing'] = data['person_emp_length'].isna().astype(int)
data['loan_int_rate_missing'] = data['loan_int_rate'].isna().astype(int)

data['person_emp_length'] = data['person_emp_length'].fillna(median_emp_length)
data['loan_int_rate'] = data['loan_int_rate'].fillna(median_int_rate)

# grade_mapping = {'A': 7, 'B': 6, 'C': 5, 'D': 4, 'E': 3, 'F': 2, 'G': 1}
# data['loan_grade'] = data['loan_grade'].map(grade_mapping)

# purpose_mapping = {
#     'DEBTCONSOLIDATION': 1,
#     'HOMEIMPROVEMENT': 2,
#     'MEDICAL': 3,
#     'PERSONAL': 4,
#     'EDUCATION': 5,
#     'VENTURE': 6
# }
# data['loan_intent'] = data['loan_intent'].map(purpose_mapping)

# home_ownership_mapping = {
#     'OWN': 1,
#     'MORTGAGE': 2,
#     'OTHER': 3,
#     'RENT': 4
# }
# data['person_home_ownership'] = data['person_home_ownership'].map(home_ownership_mapping)

X = data.drop([], axis=1)
X = pd.get_dummies(X, drop_first=True)

column_to_log = [
    'person_age',
    'person_income',
]

column_to_sqrt = [
    'person_emp_length',
    'loan_percent_income',
]

for col in column_to_log:
    if (X[col] <= 0).any():
        print(f"Column '{col}' contains non-positive values. Adding 1 to avoid log of non-positive numbers.")
        X[col] = np.log(X[col] + 1)
    else:
        X[col] = np.log(X[col])

for col in column_to_sqrt:
    if (X[col] < 0).any():
        print(f"Column '{col}' contains negative values. Setting negative values to NaN before applying sqrt.")
        X[col] = np.sqrt(X[col].clip(lower=0))
    else:
        X[col] = np.sqrt(X[col])

print(data.isnull().sum())
print(X.columns)
print(X.columns.get_loc('source'))

In [None]:
print(X)

In [None]:
print(X.shape)
X_scaled_test = x_scaler.transform(X)
print(X_scaled_test.shape)
print(X_scaled_test)

In [None]:
X_scaled_test_tensor = torch.tensor(X_scaled_test).float().to(device)
outputs = model(X_scaled_test_tensor)
print(outputs)

In [None]:
probabilities = F.softmax(outputs, dim=1)
print(probabilities)

In [None]:
positive_class_probs = probabilities[:, 1]
print(positive_class_probs)

In [None]:
import pandas as pd

test_df = pd.read_csv('/kaggle/input/playground-series-s4e10/test.csv')
ids = test_df['id']

positive_class_probs = positive_class_probs.cpu().detach().numpy()

submission_df = pd.DataFrame({
    'id': ids,
    'loan_status': positive_class_probs
})

submission_df.to_csv('submission.csv', index=False)
print("Submission file created successfully.")