In [3]:
import sys
import time
import math
import itertools

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
from scipy.special import logit
from scipy.stats import norm

import tensorflow as tf
from keras import layers, models, datasets

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
from torch.optim.lr_scheduler import StepLR, LambdaLR
import torch.autograd.profiler as profiler

from sklearn.model_selection import train_test_split, LeaveOneOut, StratifiedKFold, cross_val_predict
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder, PowerTransformer
from sklearn.metrics import f1_score, log_loss, accuracy_score
from sklearn.linear_model import LogisticRegression
from scipy.stats import norm

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [4]:
import torch

batch_size, features = 2, 3
x = torch.arange(batch_size * features, dtype=torch.float).reshape(batch_size, features)

x_i = x.unsqueeze(2).expand(batch_size, features, features)
x_j = x.unsqueeze(1).expand(batch_size, features, features)

pairs_4d = torch.stack([x_i, x_j], dim=-1)
pairs_2d = pairs_4d.view(batch_size, features * features, 2)

print(pairs_4d.shape)
print("Input shape:", x.shape)
print(x)
print("Pairs shape:", pairs_2d.shape)
print("Pairs: ", pairs_2d)

torch.Size([2, 3, 3, 2])
Input shape: torch.Size([2, 3])
tensor([[0., 1., 2.],
        [3., 4., 5.]])
Pairs shape: torch.Size([2, 9, 2])
Pairs:  tensor([[[0., 0.],
         [0., 1.],
         [0., 2.],
         [1., 0.],
         [1., 1.],
         [1., 2.],
         [2., 0.],
         [2., 1.],
         [2., 2.]],

        [[3., 3.],
         [3., 4.],
         [3., 5.],
         [4., 3.],
         [4., 4.],
         [4., 5.],
         [5., 3.],
         [5., 4.],
         [5., 5.]]])


In [5]:
idx = torch.combinations(torch.arange(4), r=2, with_replacement=True)
print(idx.shape)
print(idx)

torch.Size([10, 2])
tensor([[0, 0],
        [0, 1],
        [0, 2],
        [0, 3],
        [1, 1],
        [1, 2],
        [1, 3],
        [2, 2],
        [2, 3],
        [3, 3]])


In [6]:
import torch

B, N = 2, 4
x = torch.randn(B, N)

M = N*(N+1)//2
pairs = torch.rand(B, M, 2)
print(pairs)

pairs_flat = pairs.view(B, -1)
print(pairs_flat.shape)
print(pairs_flat)

pairs_unflat = pairs_flat.view(B, M, 2)
print(pairs_unflat)

tensor([[[0.9495, 0.9842],
         [0.9676, 0.4880],
         [0.7591, 0.9547],
         [0.7442, 0.4111],
         [0.4154, 0.6210],
         [0.5671, 0.7571],
         [0.9082, 0.7438],
         [0.8985, 0.9118],
         [0.6309, 0.3738],
         [0.2816, 0.6959]],

        [[0.6708, 0.9513],
         [0.4167, 0.2213],
         [0.7704, 0.3702],
         [0.5432, 0.5037],
         [0.9517, 0.9136],
         [0.0110, 0.1411],
         [0.6949, 0.5253],
         [0.6954, 0.3850],
         [0.9487, 0.8844],
         [0.5325, 0.6924]]])
torch.Size([2, 20])
tensor([[0.9495, 0.9842, 0.9676, 0.4880, 0.7591, 0.9547, 0.7442, 0.4111, 0.4154,
         0.6210, 0.5671, 0.7571, 0.9082, 0.7438, 0.8985, 0.9118, 0.6309, 0.3738,
         0.2816, 0.6959],
        [0.6708, 0.9513, 0.4167, 0.2213, 0.7704, 0.3702, 0.5432, 0.5037, 0.9517,
         0.9136, 0.0110, 0.1411, 0.6949, 0.5253, 0.6954, 0.3850, 0.9487, 0.8844,
         0.5325, 0.6924]])
tensor([[[0.9495, 0.9842],
         [0.9676, 0.4880],
     

In [7]:
def calculate_metrics(model, data_tensor, labels_tensor, batch_size=1024, num_features=22):
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for start_idx in range(0, len(data_tensor), batch_size):
            end_idx = min(start_idx + batch_size, len(data_tensor))
            inputs = data_tensor[start_idx:end_idx].view(-1, num_features)
            labels = labels_tensor[start_idx:end_idx]

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    accuracy = accuracy_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds, average='weighted')
    return accuracy, f1

In [8]:
class CustomDataLoader:
    def __init__(self, features, labels, validation_size=0.2, random_state=42, classification=True):        
        if validation_size > 0.0:
            stratify = labels if classification else None
            train_data, val_data, train_labels, val_labels = train_test_split(
                features, labels, test_size=validation_size, stratify=stratify, random_state=random_state
            )
            
            self.val_data_tensor = torch.tensor(val_data).float().to(device)
            
            if classification:
                self.val_labels_tensor = torch.tensor(val_labels).long().to(device)

            else:
                self.val_labels_tensor =torch.tensor(val_labels).float().to(device)
        else:
            train_data, train_labels = features, labels
            self.val_data_tensor, self.val_labels_tensor = None, None
        
        self.train_data_tensor = torch.tensor(train_data).float().to(device)

        if classification:
            self.train_labels_tensor = torch.tensor(train_labels).long().to(device)
        else:
            self.train_labels_tensor = torch.tensor(train_labels).float().to(device)

        torch.manual_seed(random_state)
        indices = torch.randperm(len(self.train_data_tensor))

        self.train_data_tensor = self.train_data_tensor[indices]
        self.train_labels_tensor = self.train_labels_tensor[indices]

In [9]:
def evaluate_model(model, custom_train_loader, criterion, optimizer, num_epochs, scheduler, batch_size=1024, num_features=22, early_stopping_patience=10):
    best_val_loss = float('inf')
    best_epoch = 0
    patience_counter = 0
    
    for epoch in range(num_epochs):
        running_loss = 0.0
        model.train()
        i = 0
        total_loss = 0
        num_items = 0

        for start_idx in range(0, len(custom_train_loader.train_data_tensor), batch_size):
            end_idx = min(start_idx + batch_size, len(custom_train_loader.train_data_tensor))
            inputs = custom_train_loader.train_data_tensor[start_idx:end_idx].view(-1, num_features)
            labels = custom_train_loader.train_labels_tensor[start_idx:end_idx]

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels, model)
            loss.backward()
            optimizer.step()
            scheduler.step()
            running_loss += loss.item() * len(labels)
            total_loss += loss.item() * len(labels)
            num_items += len(labels)

            i += 1

        if epoch % 10 == 0:
            model.eval()

            train_reg_loss = 0.0
            val_loss = 0.0
            with torch.no_grad():
                for start_idx in range(0, len(custom_train_loader.train_data_tensor), batch_size):
                    end_idx = min(start_idx + batch_size, len(custom_train_loader.train_data_tensor))
                    inputs = custom_train_loader.train_data_tensor[start_idx:end_idx].view(-1, num_features)
                    labels = custom_train_loader.train_labels_tensor[start_idx:end_idx]
        
                    outputs = model(inputs)
                    train_reg_loss += criterion.regular_loss(outputs, labels).item() * len(labels)

                for start_idx in range(0, len(custom_train_loader.val_data_tensor), batch_size):
                    end_idx = min(start_idx + batch_size, len(custom_train_loader.val_data_tensor))
                    val_inputs = custom_train_loader.val_data_tensor[start_idx:end_idx].view(-1, num_features)
                    val_labels = custom_train_loader.val_labels_tensor[start_idx:end_idx]
    
                    val_outputs = model(val_inputs)
                    val_loss += criterion.regular_loss(val_outputs, val_labels).item() * len(val_labels)
    
            avg_train_loss = running_loss / len(custom_train_loader.train_data_tensor)
            avg_val_loss = val_loss / len(custom_train_loader.val_data_tensor)
    
            train_accuracy, train_f1 = calculate_metrics(model, custom_train_loader.train_data_tensor, custom_train_loader.train_labels_tensor, batch_size, num_features)
            val_accuracy, val_f1 = calculate_metrics(model, custom_train_loader.val_data_tensor, custom_train_loader.val_labels_tensor, batch_size, num_features)
    
            print(f'Epoch {epoch + 1}, Training Loss: {avg_train_loss}, Validation Loss: {avg_val_loss}')
            print(f'Epoch {epoch + 1}, Training Loss: {train_reg_loss / len(custom_train_loader.train_data_tensor)}, Validation Loss: {avg_val_loss}')
            print(f'Training Accuracy: {train_accuracy}, Training F1 Score: {train_f1}')
            print(f'Validation Accuracy: {val_accuracy}, Validation F1 Score: {val_f1}')
            for param_group in optimizer.param_groups:
                print("Learning Rate:", param_group['lr'])
                
            print()
            
            if avg_val_loss < best_val_loss:
                best_val_loss = avg_val_loss
                best_epoch = epoch + 1
                patience_counter = 0
            else:
                patience_counter += 10
                if patience_counter >= early_stopping_patience:
                    print(f'Early stopping triggered after {epoch + 1} epochs.')
                    print(f'Best Validation Loss: {best_val_loss} from Epoch {best_epoch}')
                    break

    if patience_counter < early_stopping_patience:
        print(f'Best Validation Loss after {num_epochs} epochs: {best_val_loss} from Epoch {best_epoch}')

In [10]:
data_dl = pd.read_csv('/kaggle/input/playground-series-s4e10/train.csv')
data_og = pd.read_csv('/kaggle/input/loan-approval-prediction/credit_risk_dataset.csv')

data_dl = data_dl.drop(["id"], axis=1)

median_emp_length = data_og['person_emp_length'].median()
median_int_rate = data_og['loan_int_rate'].median()

data_dl['source'] = 0
data_og['source'] = 1

data = pd.concat([data_dl, data_og], ignore_index=True)

In [11]:
data['person_emp_length_missing'] = data['person_emp_length'].isna().astype(int)
data['loan_int_rate_missing'] = data['loan_int_rate'].isna().astype(int)

data['person_emp_length'] = data['person_emp_length'].fillna(median_emp_length)
data['loan_int_rate'] = data['loan_int_rate'].fillna(median_int_rate)

# grade_mapping = {'A': 7, 'B': 6, 'C': 5, 'D': 4, 'E': 3, 'F': 2, 'G': 1}
# data['loan_grade'] = data['loan_grade'].map(grade_mapping)

# purpose_mapping = {
#     'DEBTCONSOLIDATION': 1,
#     'HOMEIMPROVEMENT': 2,
#     'MEDICAL': 3,
#     'PERSONAL': 4,
#     'EDUCATION': 5,
#     'VENTURE': 6
# }
# data['loan_intent'] = data['loan_intent'].map(purpose_mapping)

# home_ownership_mapping = {
#     'OWN': 1,
#     'MORTGAGE': 2,
#     'OTHER': 3,
#     'RENT': 4
# }
# data['person_home_ownership'] = data['person_home_ownership'].map(home_ownership_mapping)

X = data.drop(["loan_status"], axis=1)
X = pd.get_dummies(X, drop_first=True)
y = data["loan_status"]

column_to_log = [
    'person_age',
    'person_income',
]

column_to_sqrt = [
    'person_emp_length',
    'loan_percent_income',
]

for col in column_to_log:
    if (X[col] <= 0).any():
        print(f"Column '{col}' contains non-positive values. Adding 1 to avoid log of non-positive numbers.")
        X[col] = np.log(X[col] + 1)
    else:
        X[col] = np.log(X[col])

for col in column_to_sqrt:
    if (X[col] < 0).any():
        print(f"Column '{col}' contains negative values. Setting negative values to NaN before applying sqrt.")
        X[col] = np.sqrt(X[col].clip(lower=0))
    else:
        X[col] = np.sqrt(X[col])

print(data.isnull().sum())
print(X.columns)
print(X.shape, y.shape)
print(X.columns.get_loc('source'))

person_age                    0
person_income                 0
person_home_ownership         0
person_emp_length             0
loan_intent                   0
loan_grade                    0
loan_amnt                     0
loan_int_rate                 0
loan_percent_income           0
cb_person_default_on_file     0
cb_person_cred_hist_length    0
loan_status                   0
source                        0
person_emp_length_missing     0
loan_int_rate_missing         0
dtype: int64
Index(['person_age', 'person_income', 'person_emp_length', 'loan_amnt',
       'loan_int_rate', 'loan_percent_income', 'cb_person_cred_hist_length',
       'source', 'person_emp_length_missing', 'loan_int_rate_missing',
       'person_home_ownership_OTHER', 'person_home_ownership_OWN',
       'person_home_ownership_RENT', 'loan_intent_EDUCATION',
       'loan_intent_HOMEIMPROVEMENT', 'loan_intent_MEDICAL',
       'loan_intent_PERSONAL', 'loan_intent_VENTURE', 'loan_grade_B',
       'loan_grade_C', 'loa

In [12]:
x_scaler = StandardScaler()
x_scaled = x_scaler.fit_transform(X)

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

print(x_scaled.shape)

(91226, 25)


In [13]:
x_scaled = x_scaler.transform(X)

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

print(x_scaled.shape)

(91226, 25)


In [14]:
feature_means = x_scaled.mean(axis=0)
feature_variances = x_scaled.var(axis=0)
feature_mins = x_scaled.min(axis=0)
feature_maxs = x_scaled.max(axis=0)

feature_stats_scaled_full = pd.DataFrame({
    'Mean': feature_means,
    'Variance': feature_variances,
    'Min': feature_mins,
    'Max': feature_maxs
})

print("Mean, Variance, Min, and Max of Scaled Features:")
print(feature_stats_scaled_full)

Mean, Variance, Min, and Max of Scaled Features:
            Mean  Variance       Min        Max
0  -3.289997e-16       1.0 -1.552712   8.591255
1  -1.420680e-16       1.0 -5.315235   9.344027
2   3.987875e-17       1.0 -1.859550   8.913061
3  -1.333446e-16       1.0 -1.513249   4.385625
4   9.327889e-16       1.0 -1.759487   4.065809
5  -5.358707e-16       1.0 -3.245038   4.413749
6   6.480297e-17       1.0 -0.943500   5.989958
7  -1.944089e-16       1.0 -0.745361   1.341632
8  -4.610980e-17       1.0 -0.099539  10.046317
9   6.729539e-17       1.0 -0.188056   5.317578
10  4.673291e-18       1.0 -0.046402  21.550842
11  7.477266e-17       1.0 -0.258691   3.865621
12 -6.698384e-17       1.0 -1.031790   0.969189
13  3.489391e-17       1.0 -0.508188   1.967776
14 -7.103402e-17       1.0 -0.348605   2.868576
15  1.217003e-17       1.0 -0.478658   2.089176
16  3.115527e-19       1.0 -0.453072   2.207155
17  8.801365e-18       1.0 -0.456460   2.190774
18  2.141925e-17       1.0 -0.714835   

In [15]:
class CustomLoss(nn.Module):
    def __init__(self, criterion, f1_lambda, f2_lambda, l1_lambda, l2_lambda, wa_lambda):
        super(CustomLoss, self).__init__()
        self.criterion = criterion
        self.f1_lambda = f1_lambda
        self.f2_lambda = f2_lambda
        self.l1_lambda = l1_lambda
        self.l2_lambda = l2_lambda
        self.wa_lambda = wa_lambda
        self.i = 0

    def forward(self, outputs, labels, model): 
        f1_loss = 0.0
        f2_loss = 0.0
        l1_loss = 0.0
        l2_loss = 0.0

        # for name, module in model.named_modules():
        #     if isinstance(module, CustomActivation):
        #         f1_loss += (module.a ** 2).sum() + (module.b ** 2).sum()
        #         f2_loss += ((module.a - module.b) ** 2).sum()

        #     if isinstance(module, nn.Linear):
        #         l1_loss += torch.norm(module.weight, 1)
        #         l2_loss += torch.norm(module.weight, 2) ** 2

        total_loss = (self.criterion(outputs, labels)
                      + self.f1_lambda * f1_loss
                      + self.f2_lambda * f2_loss
                      + self.l1_lambda * l1_loss
                      + self.l2_lambda * l2_loss)
        self.i += 1

        return total_loss

    def compute_gradient_magnitude(self, model):
        total_abs_sum = 0.0
        for param in model.parameters():
            if param.grad is not None:
                total_abs_sum += param.grad.abs().sum().item()
        self.grad_magnitude = total_abs_sum

    def regular_loss(self, outputs, labels):
        return self.criterion(outputs, labels)

In [None]:
# class CustomActivation1d_2(nn.Module):
#     def __init__(self, num_features, init_identity=True):
#         super(CustomActivation1d_2, self).__init__()
#         num_control_points = 2
        
#         self.a = nn.Parameter(torch.zeros(num_features, num_control_points))
#         self.b = nn.Parameter(torch.zeros(num_features, num_control_points))

#         self.local_bias = nn.Parameter(torch.zeros(num_features, num_control_points))
#         self.global_bias = nn.Parameter(torch.zeros(1, num_features))

#         with torch.no_grad():
#             random_tensor = torch.randn(num_features) / 2
#             self.a[:, 0] = random_tensor
#             self.b[:, 0] = random_tensor

#             random_tensor = torch.randn(num_features) / 2
#             self.a[:, 1] = random_tensor
#             self.b[:, 1] = random_tensor

#             self.local_bias[:, 0] = norm.ppf(1/3)
#             self.local_bias[:, 1] = norm.ppf(2/3)

#         """
#         with torch.no_grad():
#             if init_identity:
#         """
        
#     def forward(self, x):
#         x = x.unsqueeze(-1) + self.local_bias
#         x = torch.where(x < 0, self.a * x, self.b * x)
#         x = x.sum(dim=-1) + self.global_bias            
#         return x

In [16]:
class CustomActivation1d(nn.Module):
    def __init__(self, num_features, init_identity=False):
        super(CustomActivation1d, self).__init__()
        self.a = nn.Parameter(torch.cat([torch.zeros(25), torch.ones(num_features - 25)]))
        self.b = nn.Parameter(torch.cat([torch.zeros(25), torch.ones(num_features - 25)]))

        random_tensor = torch.randn(num_features)
        self.a = nn.Parameter(random_tensor)
        self.b = nn.Parameter(random_tensor)

        self.a = nn.Parameter(torch.zeros(num_features))
        self.b = nn.Parameter(torch.zeros(num_features))

        self.local_bias = nn.Parameter(torch.zeros(num_features))
        self.global_bias = nn.Parameter(torch.tensor(0.0))

    def forward(self, x):
        x = x + self.local_bias
        x = torch.where(x < 0, self.a * x, self.b * x)
        return x.sum(dim=-1) + self.global_bias

In [17]:
class CustomActivation1d_2(nn.Module):
    def __init__(self, num_features, init_identity=False):
        super(CustomActivation1d_2, self).__init__()
        random_tensor = torch.randn(num_features)
        self.a = nn.Parameter(random_tensor)
        self.b = nn.Parameter(random_tensor)

        self.a = nn.Parameter(torch.zeros(num_features))
        self.b = nn.Parameter(torch.zeros(num_features))

        self.a = nn.Parameter(torch.tensor([1 if i % 2 == 0 else 0 for i in range(num_features)], dtype=torch.float32))
        self.b = nn.Parameter(torch.tensor([1 if i % 2 == 0 else 0 for i in range(num_features)], dtype=torch.float32))

        self.local_bias = nn.Parameter(torch.zeros(num_features))
        self.global_bias = nn.Parameter(torch.zeros(num_features))

    def forward(self, x):
        x = x + self.local_bias
        x = torch.where(x < 0, self.a * x, self.b * x)
        return x + self.global_bias

In [33]:
import torch

# Example shapes
batch_size = 4
x_features = 3
y_features = 5

# Random tensors
x = torch.rand(batch_size, x_features)  # Shape (batch_size, x_features)
y = torch.rand(batch_size, y_features)  # Shape (batch_size, y_features)

# Compute the outer product for each batch
# Add an extra dimension to align dimensions for broadcasting
outer_product = x.unsqueeze(2) * y.unsqueeze(1)  # Shape (batch_size, x_features, y_features)

# Reshape to (batch_size, x_features * y_features)
result = outer_product.view(batch_size, -1)

# Resulting shape
print(result.shape)  # (batch_size, x_features * y_features)


torch.Size([4, 15])


In [None]:
class CustomActivation(nn.Module):
    def __init__(self, num_features):
        super(CustomActivation, self).__init__()
        self.a = nn.Parameter(torch.zeros(num_features, num_features))
        self.b = nn.Parameter(torch.zeros(num_features, num_features))
        
        self.local_bias = nn.Parameter(torch.zeros(num_features, num_features))
        self.global_bias = nn.Parameter(torch.zeros(num_features))

        with torch.no_grad():
            self.a.fill_diagonal_(1)
            self.b.fill_diagonal_(1)
            
    def forward(self, x):
        batch_size, num_features = x.shape
        x = x.unsqueeze(-1).expand(-1, -1, num_features)
        x = x + self.local_bias
        x = torch.where(x < 0, self.a * x, self.b * x)
        return x.sum(dim=1) + self.global_bias

In [None]:
class CustomLinear(nn.Module):
    def __init__(self, num_features, num_outputs, init_identity=False):
        super(CustomLinear, self).__init__()
        
        if init_identity and num_features != num_outputs:
            raise ValueError("For identity initialization, num_features must equal num_outputs.")

        self.linear = nn.Linear(num_features, num_outputs, bias=True)
        
        with torch.no_grad():
            self.linear.bias.zero_()

            if init_identity:
                self.linear.weight.copy_(torch.eye(num_features, num_outputs))
            else:
                self.linear.weight.zero_()

    def forward(self, x):
        return self.linear(x)

In [None]:
"""
class TabularDenseNet(nn.Module):
    def __init__(self, input_size, output_size, num_control_points, num_layers, window_size):
        super(TabularDenseNet, self).__init__()
        self.layers = nn.ModuleList()
        
        if num_layers % 2 == 1:
            self.layers.append(CustomLinear(input_size, input_size, init_identity=True))
            # self.layers.append(CustomActivation(input_size, window_size, num_control_points, init_identity=True))
            num_layers -= 1
            input_size *= 2
            
        for i in range(num_layers):
            if i % 2 == 0:
                self.layers.append(CustomLinear(input_size, input_size, init_identity=True))
            else:
                # self.layers.append(CustomActivation(input_size, window_size, num_control_points, init_identity=True))
                self.layers.append(CustomLinear(input_size, input_size, init_identity=True))

            input_size *= 2

        self.final = CustomLinear(input_size, output_size, init_identity=False)
        self.final_act = CustomActivation(output_size, window_size, num_control_points, init_identity=True)
        
    def forward(self, x):
        outputs = [x]

        for layer in self.layers:
            concatenated_outputs = torch.cat(outputs, dim=-1)
            outputs.append(F.relu(layer(concatenated_outputs)))

        concatenated_outputs = torch.cat(outputs, dim=-1)
        x = self.final(concatenated_outputs)
        x = self.final_act(x)
        return x
"""
print("")

In [None]:
class TabularDenseNet(nn.Module):
    def __init__(self, input_size, output_size, num_control_points, num_layers):
        super(TabularDenseNet, self).__init__()
        self.layers = nn.ModuleList()
        for i in range(num_layers):
            self.layers.append(CustomActivation(input_size))
            input_size *= 2

        self.final_layer = CustomActivation1d(input_size)

    def forward(self, x):
        outputs = [x]

        for layer in self.layers:
            concatenated_outputs = torch.cat(outputs, dim=-1)
            outputs.append(layer(concatenated_outputs))

        final_out = self.final_layer(torch.cat(outputs, dim=-1))
        return torch.stack([final_out, -final_out], dim=-1)

    # def forward(self, x):
    #     outputs = [x]
    #     summed_total = torch.zeros(x.size(0), device=x.device)
    #     summed_total += self.bias

    #     for layer, layer_norm in zip(self.layers, self.layer_norms):
    #         concatenated_outputs = torch.cat(outputs, dim=-1)
    #         inter_features = layer_norm(layer(concatenated_outputs))
    #         summed_feature = inter_features.sum(dim=-1)

    #         outputs.append(inter_features)
    #         outputs.append(summed_feature.unsqueeze(-1))
    #         summed_total += summed_feature

    #     print(torch.cat(outputs, dim=-1).shape)
    #     return torch.stack([summed_total, -summed_total], dim=-1)

    # def forward(self, x):
    #     batch_size = x.size(0)
    #     device = x.device
    #     L = self.num_layers
    #     D = self.input_dim

    #     total_features = 6656
    #     concatenated_features = torch.zeros(batch_size, total_features, device=device)

    #     concatenated_features[:, :D] = x

    #     current_pos = D
    #     summed_total = self.bias.expand(batch_size)

    #     for i, (layer, layer_norm) in enumerate(zip(self.layers, self.layer_norms)):
    #         current_features = concatenated_features[:, :current_pos]
    #         inter_features = (layer(current_features))
    #         summed_feature = inter_features.sum(dim=-1, keepdim=True)

    #         concatenated_features[:, current_pos:current_pos + current_pos] = inter_features
    #         concatenated_features[:, current_pos + current_pos + 1] = summed_feature.squeeze(-1)

    #         current_pos += current_pos + 1
    #         summed_total = summed_total + summed_feature.squeeze(-1)

    #     return torch.stack([summed_total, -summed_total], dim=-1)

In [47]:
class TabularDenseNet(nn.Module):
    def __init__(self, input_size, output_size, num_layers):
        super(TabularDenseNet, self).__init__()
        self.layers = nn.ModuleList()

        input_size += 625
        input_size += input_size * 25

        self.final_weight = nn.Parameter(torch.zeros(input_size))
        self.bias = nn.Parameter(torch.tensor(0.0))

    def forward(self, x):
        outputs = [x]

        for i in range (2):
            concatenated_outputs = torch.cat(outputs, dim=-1)
            outer_product = concatenated_outputs.unsqueeze(2) * x.unsqueeze(1)
            result = outer_product.view(x.size(0), -1)
            outputs.append(result)

        final_out = ((torch.cat(outputs, dim=-1) * self.final_weight).sum(dim=-1) + self.bias)
        return torch.stack([final_out, -final_out], dim=-1)

In [52]:
class TabularDenseNet(nn.Module):
    def __init__(self, input_size, output_size, num_layers):
        super(TabularDenseNet, self).__init__()
        self.layers = nn.ModuleList()

        for i in range(num_layers):
            input_size += ((input_size + 1) * input_size) // 2

        self.final_layer = nn.Linear(input_size, output_size, bias=True)
        nn.init.zeros_(self.final_layer.weight)
        nn.init.zeros_(self.final_layer.bias)

    def forward(self, x):
        outputs = [x]

        for _ in range(1):  # (loop is unnecessary, but keeping it as per your code)
            concatenated_outputs = torch.cat(outputs, dim=-1)  # [batch, features]

            # Generate pairs manually instead of using torch.combinations
            batch_size, feature_size = concatenated_outputs.shape
            idx = torch.triu_indices(feature_size, feature_size, offset=0)  # Upper triangle indices
            pairs_first = concatenated_outputs[:, idx[0]]  # Select first elements
            pairs_second = concatenated_outputs[:, idx[1]]  # Select second elements
            
            result = pairs_first * pairs_second  # Element-wise multiplication
            outputs.append(result)

        final_out = self.final_layer(torch.cat(outputs, dim=-1))
        return final_out

In [None]:
# import torch
# import torch.nn as nn

# # 1. Create a random input tensor of shape (batch_size, num_features).
# batch_size = 2
# num_features = 4
# concatenated_outputs = torch.randn(batch_size, num_features)
# print("concatenated_outputs:", concatenated_outputs.shape)

# # 2. Generate all (i, j) combinations with i <= j.
# idx = torch.combinations(torch.arange(num_features), r=2, with_replacement=True)
# # idx has shape (M, 2), where M = num_features * (num_features + 1) // 2
# print("idx shape:", idx.shape)

# # 3. Gather the pairs. pairs has shape (B, M, 2).
# pairs = concatenated_outputs[:, idx]  
# print("pairs shape:", pairs.shape)
# print(pairs)

# # 4. Flatten from (B, M, 2) -> (B, M*2).
# pairs_flat = pairs.view(pairs.size(0), -1)  
# print("pairs_flat shape:", pairs_flat.shape)

# # 5. Reshape (unflatten) back to (B, M, 2).
# pairs_unflat = pairs_flat.view(pairs.size(0), pairs.size(1), 2)
# print("pairs_unflat shape:", pairs_unflat.shape)
# print(pairs_unflat)

# # 6. Sum across the last dimension to get (B, M).
# summed_output = pairs_unflat.sum(dim=-1)
# print("summed_output shape:", summed_output.shape)
# print("summed_output:", summed_output)


In [52]:
custom_train_loader = CustomDataLoader(x_scaled, y_encoded, validation_size=0.2, random_state=0, classification=True)
print(custom_train_loader.train_data_tensor.shape)

torch.Size([72980, 25])


In [53]:
num_features = 25
num_classes = 2
num_epochs = 10000
batch_size = 72980 * 1

In [54]:
torch.cuda.empty_cache()

In [55]:
def custom_lr_lambda(step):
    num_step_threshold = 100

    if step < num_step_threshold:
        return step / num_step_threshold
    if step == num_step_threshold:
        print("here")
    return 0.99999 ** (step - num_step_threshold)

start_time = time.time()
model = TabularDenseNet(num_features, num_classes, 1).to(device)

optimizer = optim.Rprop(model.parameters(), lr=0.001 * 0.001 * 0.001 * 0.001 * 0.001 * 0.001 * 1.0)
scheduler = LambdaLR(optimizer, lr_lambda=custom_lr_lambda)

criterion = CustomLoss(nn.CrossEntropyLoss(), 0.0, 0.0, 0.0, 0.0, 0.0)
evaluate_model(model, custom_train_loader, criterion, optimizer, 2000, scheduler, batch_size, num_features, early_stopping_patience=10000)

print(f"Execution time: {(time.time() - start_time):.6f} seconds")

Epoch 1, Training Loss: 0.6931450963020325, Validation Loss: 0.692583441734314
Epoch 1, Training Loss: 0.6925780177116394, Validation Loss: 0.692583441734314
Training Accuracy: 0.80841326390792, Training F1 Score: 0.8229904065941791
Validation Accuracy: 0.8094376849720487, Validation F1 Score: 0.8239895162241755
Learning Rate: 1.0000000000000002e-20

Epoch 11, Training Loss: 0.6803534626960754, Validation Loss: 0.6778711080551147
Epoch 11, Training Loss: 0.6776260137557983, Validation Loss: 0.6778711080551147
Training Accuracy: 0.8151137297889833, Training F1 Score: 0.828331048459051
Validation Accuracy: 0.8153019839964923, Validation F1 Score: 0.8286464457730224
Learning Rate: 1.1000000000000004e-19

Epoch 21, Training Loss: 0.6211879253387451, Validation Loss: 0.6118537187576294
Epoch 21, Training Loss: 0.609385073184967, Validation Loss: 0.6118537187576294
Training Accuracy: 0.8429432721293505, Training F1 Score: 0.8512029316641893
Validation Accuracy: 0.8400745368847967, Validation

In [79]:
def custom_lr_lambda(step):
    num_step_threshold = 100

    if step < num_step_threshold:
        return step / num_step_threshold
    if step == num_step_threshold:
        print("here")
    return 0.99999 ** (step - num_step_threshold)

start_time = time.time()
model = TabularDenseNet(num_features, num_classes, 1).to(device)

optimizer = optim.Rprop(model.parameters(), lr=0.001 * 0.001 * 0.001 * 0.001 * 0.001 * 0.001 * 1.0)
scheduler = LambdaLR(optimizer, lr_lambda=custom_lr_lambda)

criterion = CustomLoss(nn.CrossEntropyLoss(), 0.0, 0.0, 0.0, 0.0, 0.0)
evaluate_model(model, custom_train_loader, criterion, optimizer, 2000, scheduler, batch_size, num_features, early_stopping_patience=10000)

print(f"Execution time: {(time.time() - start_time):.6f} seconds")

Epoch 1, Training Loss: 0.6931450963020325, Validation Loss: 0.6931193470954895
Epoch 1, Training Loss: 0.6931175589561462, Validation Loss: 0.6931193470954895
Training Accuracy: 0.8623184434091532, Training F1 Score: 0.8428494257227417
Validation Accuracy: 0.8647374767072235, Validation F1 Score: 0.8461320946083078
Learning Rate: 1.0000000000000002e-20

Epoch 11, Training Loss: 0.6924352645874023, Validation Loss: 0.6922627091407776
Epoch 11, Training Loss: 0.6922658085823059, Validation Loss: 0.6922627091407776
Training Accuracy: 0.8623184434091532, Training F1 Score: 0.8428494257227417
Validation Accuracy: 0.8647374767072235, Validation F1 Score: 0.8461320946083078
Learning Rate: 1.1000000000000004e-19

Epoch 21, Training Loss: 0.6880935430526733, Validation Loss: 0.6870555281639099
Epoch 21, Training Loss: 0.6870748400688171, Validation Loss: 0.6870555281639099
Training Accuracy: 0.8622773362565086, Training F1 Score: 0.842815759969005
Validation Accuracy: 0.864847089773101, Valida

In [None]:
evaluate_model(model, custom_train_loader, criterion, optimizer, 1000, scheduler, batch_size, num_features, early_stopping_patience=10000)

In [None]:
def custom_lr_lambda(step):
    num_step_threshold = 100

    if step < num_step_threshold:
        return step / num_step_threshold
    if step == num_step_threshold:
        print("here")
    return 0.99999 ** (step - num_step_threshold)

start_time = time.time()
model = TabularDenseNet(num_features, num_classes, 1, 1).to(device)

optimizer = optim.Adam(model.parameters(), lr=0.001 * 1.0)
scheduler = LambdaLR(optimizer, lr_lambda=custom_lr_lambda)

criterion = CustomLoss(nn.CrossEntropyLoss(), 0.0, 0.0, 0.0, 0.0, 0.0)
evaluate_model(model, custom_train_loader, criterion, optimizer, 1000, scheduler, batch_size, num_features, early_stopping_patience=10000)

print(f"Execution time: {(time.time() - start_time):.6f} seconds")

In [None]:
def custom_lr_lambda(step):
    num_step_threshold = 100

    if step < num_step_threshold:
        return step / num_step_threshold
    if step == num_step_threshold:
        print("here")
    return 0.99999 ** (step - num_step_threshold)

start_time = time.time()
model = TabularDenseNet(num_features, num_classes, 1, 1).to(device)

optimizer = optim.Adam(model.parameters(), lr=0.001 * 1.0)
scheduler = LambdaLR(optimizer, lr_lambda=custom_lr_lambda)

criterion = CustomLoss(nn.CrossEntropyLoss(), 0.0, 0.0, 0.0, 0.0, 0.0)
evaluate_model(model, custom_train_loader, criterion, optimizer, 1000, scheduler, batch_size, num_features, early_stopping_patience=10000)

print(f"Execution time: {(time.time() - start_time):.6f} seconds")

In [None]:
def custom_lr_lambda(step):
    num_step_threshold = 100

    if step < num_step_threshold:
        return step / num_step_threshold
    if step == num_step_threshold:
        print("here")
    return 0.99999 ** (step - num_step_threshold)

start_time = time.time()
model = TabularDenseNet(num_features, num_classes, 1).to(device)

optimizer = optim.Rprop(model.parameters(), lr=0.001 * 1.0)
scheduler = LambdaLR(optimizer, lr_lambda=custom_lr_lambda)

criterion = CustomLoss(nn.CrossEntropyLoss(), 0.0, 0.0, 0.0, 0.0, 0.0)
evaluate_model(model, custom_train_loader, criterion, optimizer, 1000, scheduler, batch_size, num_features, early_stopping_patience=10000)

print(f"Execution time: {(time.time() - start_time):.6f} seconds")

In [None]:
torch.set_printoptions(sci_mode=False, precision=4)

for name, param in model.named_parameters():
    print(name)
    print(param)

In [None]:
evaluate_model(model, custom_train_loader, criterion, optimizer, 1500, scheduler, batch_size, num_features, early_stopping_patience=10000)

In [56]:
torch.set_printoptions(sci_mode=False, precision=4, threshold=float('inf'))

for name, param in model.named_parameters():
    print(name)
    print(param)

final_weight
Parameter containing:
tensor([     0.0012,      0.0214,      0.0035,     -0.0116,     -0.0081,
             0.0168,      0.0008,     -0.0155,     -0.0101,     -0.0168,
            -0.0081,     -0.0026,      0.0085,     -0.0214,     -0.0143,
            -0.0078,     -0.0024,     -0.0051,     -0.0119,     -0.0113,
            -0.0286,     -0.0089,     -0.0069,     -0.0896,     -0.0104,
            -0.0089,     -0.0084,      0.0017,     -0.0163,      0.0038,
            -0.0068,     -0.0034,      0.0020,      0.0037,     -0.0068,
            -0.0036,      0.0061,     -0.0002,     -0.0133,      0.0085,
            -0.0041,     -0.0011,      0.0010,      0.0077,     -0.0027,
            -0.0016,      0.0007,     -0.0010,     -0.0010,      0.0077,
            -0.0084,      0.0717,     -0.0116,      0.0085,      0.0019,
             0.1308,      0.0193,      0.1563,     -0.0006,      0.0010,
            -0.0104,      0.0021,     -0.0751,     -0.0101,      0.0016,
             0.0

In [None]:
val_loss = 0.0
with torch.no_grad():
    for start_idx in range(0, len(custom_train_loader.train_data_tensor), batch_size):
        end_idx = min(start_idx + batch_size, len(custom_train_loader.train_data_tensor))
        inputs = custom_train_loader.train_data_tensor[start_idx:end_idx].view(-1, num_features)
        labels = custom_train_loader.train_labels_tensor[start_idx:end_idx]

        outputs = model(inputs)

    for start_idx in range(0, len(custom_train_loader.val_data_tensor), batch_size):
        end_idx = min(start_idx + batch_size, len(custom_train_loader.val_data_tensor))
        val_inputs = custom_train_loader.val_data_tensor[start_idx:end_idx].view(-1, num_features)
        val_labels = custom_train_loader.val_labels_tensor[start_idx:end_idx]

        val_outputs = model(val_inputs)
        val_loss += criterion.regular_loss(val_outputs, val_labels).item() * len(val_labels)

avg_val_loss = val_loss / len(custom_train_loader.val_data_tensor)
val_accuracy, val_f1 = calculate_metrics(model, custom_train_loader.val_data_tensor, custom_train_loader.val_labels_tensor, batch_size, num_features)

print(avg_val_loss)
print(val_accuracy)
print(val_f1)

In [None]:
max_magnitude = -float("inf")
max_param_name = None
max_param_index = None

for name, param in model.named_parameters():
    if param.requires_grad:
        weights = param.data
        max_val, flat_index = torch.max(torch.abs(weights).view(-1), 0)
        if max_val > max_magnitude:
            max_magnitude = max_val
            max_param_name = name
            max_param_index = flat_index

if max_param_name is not None:
    for name, param in model.named_parameters():
        if name == max_param_name:
            max_param_coords = torch.unravel_index(max_param_index, param.data.shape)
            param.data[max_param_coords] = 0
            print(f"Modified parameter: {name}")
            print(f"Set the weight at {max_param_coords} (value {max_magnitude}) to 0.")

In [None]:
val_loss = 0.0
with torch.no_grad():
    for start_idx in range(0, len(custom_train_loader.train_data_tensor), batch_size):
        end_idx = min(start_idx + batch_size, len(custom_train_loader.train_data_tensor))
        inputs = custom_train_loader.train_data_tensor[start_idx:end_idx].view(-1, num_features)
        labels = custom_train_loader.train_labels_tensor[start_idx:end_idx]

        outputs = model(inputs)

    for start_idx in range(0, len(custom_train_loader.val_data_tensor), batch_size):
        end_idx = min(start_idx + batch_size, len(custom_train_loader.val_data_tensor))
        val_inputs = custom_train_loader.val_data_tensor[start_idx:end_idx].view(-1, num_features)
        val_labels = custom_train_loader.val_labels_tensor[start_idx:end_idx]

        val_outputs = model(val_inputs)
        val_loss += criterion.regular_loss(val_outputs, val_labels).item() * len(val_labels)

avg_val_loss = val_loss / len(custom_train_loader.val_data_tensor)
val_accuracy, val_f1 = calculate_metrics(model, custom_train_loader.val_data_tensor, custom_train_loader.val_labels_tensor, batch_size, num_features)

print(avg_val_loss)
print(val_accuracy)
print(val_f1)

In [None]:
evaluate_model(model, custom_train_loader, criterion, optimizer, 1, scheduler, batch_size, num_features, early_stopping_patience=10000)

In [None]:
evaluate_model(model, custom_train_loader, criterion, optimizer, 1, scheduler, batch_size, num_features, early_stopping_patience=10000)

In [None]:
max_magnitude = -float("inf")
max_param_name = None
max_param_index = None

for name, param in model.named_parameters():
    if param.requires_grad:
        weights = param.data
        max_val, flat_index = torch.max(torch.abs(weights).view(-1), 0)
        if max_val > max_magnitude:
            max_magnitude = max_val
            max_param_name = name
            max_param_index = flat_index

if max_param_name is not None:
    for name, param in model.named_parameters():
        if name == max_param_name:
            max_param_coords = torch.unravel_index(max_param_index, param.data.shape)
            param.data[max_param_coords] = 0
            print(f"Modified parameter: {name}")
            print(f"Set the weight at {max_param_coords} (value {max_magnitude}) to 0.")

In [None]:
evaluate_model(model, custom_train_loader, criterion, optimizer, 1, scheduler, batch_size, num_features, early_stopping_patience=10000)

In [None]:
max_magnitude = -float("inf")
max_param_name = None
max_param_index = None

for name, param in model.named_parameters():
    if param.requires_grad:
        weights = param.data
        max_val, flat_index = torch.max(torch.abs(weights).view(-1), 0)
        if max_val > max_magnitude:
            max_magnitude = max_val
            max_param_name = name
            max_param_index = flat_index

if max_param_name is not None:
    for name, param in model.named_parameters():
        if name == max_param_name:
            max_param_coords = torch.unravel_index(max_param_index, param.data.shape)
            param.data[max_param_coords] = 0
            print(f"Modified parameter: {name}")
            print(f"Set the weight at {max_param_coords} (value {max_magnitude}) to 0.")

In [None]:
evaluate_model(model, custom_train_loader, criterion, optimizer, 1, scheduler, batch_size, num_features, early_stopping_patience=10000)

In [70]:
max_magnitude = -float("inf")
max_param_name = None
max_param_index = None

for name, param in model.named_parameters():
    if param.requires_grad:
        weights = param.data
        max_val, flat_index = torch.max(torch.abs(weights).view(-1), 0)
        if max_val > max_magnitude:
            max_magnitude = max_val
            max_param_name = name
            max_param_index = flat_index

if max_param_name is not None:
    for name, param in model.named_parameters():
        if name == max_param_name:
            max_param_coords = torch.unravel_index(max_param_index, param.data.shape)
            param.data[max_param_coords] = 0
            print(f"Modified parameter: {name}")
            print(f"Set the weight at {max_param_coords} (value {max_magnitude}) to 0.")

Modified parameter: final_weight
Set the weight at (tensor(15953, device='cuda:0'),) (value 0.6976040601730347) to 0.


In [71]:
evaluate_model(model, custom_train_loader, criterion, optimizer, 1, scheduler, batch_size, num_features, early_stopping_patience=10000)

Epoch 1, Training Loss: 0.2164945900440216, Validation Loss: 0.30465009808540344
Epoch 1, Training Loss: 0.21637515723705292, Validation Loss: 0.30465009808540344
Training Accuracy: 0.9402302000548095, Training F1 Score: 0.9374103602656251
Validation Accuracy: 0.931711059958347, Validation F1 Score: 0.9289306248027878
Learning Rate: 9.811203997458424e-19

Best Validation Loss after 1 epochs: 0.30465009808540344 from Epoch 1


In [None]:
for i in range(20):
    max_magnitude = -float("inf")
    max_param_name = None
    max_param_index = None
    
    for name, param in model.named_parameters():
        if param.requires_grad:
            weights = param.data
            max_val, flat_index = torch.max(torch.abs(weights).view(-1), 0)
            if max_val > max_magnitude:
                max_magnitude = max_val
                max_param_name = name
                max_param_index = flat_index
    
    if max_param_name is not None:
        for name, param in model.named_parameters():
            if name == max_param_name:
                max_param_coords = torch.unravel_index(max_param_index, param.data.shape)
                param.data[max_param_coords] = 0
                print(f"Modified parameter: {name}")
                print(f"Set the weight at {max_param_coords} (value {max_magnitude}) to 0.")

    evaluate_model(model, custom_train_loader, criterion, optimizer, 1, scheduler, batch_size, num_features, early_stopping_patience=10000)

In [59]:
import torch
import matplotlib.pyplot as plt
import seaborn as sns

weights = model.final_layer.a

print(torch.isinf(weights).any())
print(torch.isnan(weights).any())

weights_numpy = weights.cpu().detach().numpy()
sorted_weights = torch.sort(weights).values

print(sorted_weights)

AttributeError: 'TabularDenseNet' object has no attribute 'final_layer'

In [None]:
optimizer = optim.Adam(model.parameters(), lr=0.001 * 0.1)
scheduler = LambdaLR(optimizer, lr_lambda=custom_lr_lambda)

criterion = CustomLoss(nn.CrossEntropyLoss(), 0.0, 0.0, 0.0, 0.0, 0.0)
evaluate_model(model, custom_train_loader, criterion, optimizer, 10000, scheduler, batch_size, num_features, early_stopping_patience=10000)

print(f"Execution time: {(time.time() - start_time):.6f} seconds")

In [None]:
def custom_lr_lambda(step):
    num_step_threshold = 100

    if step < num_step_threshold:
        return step / num_step_threshold
    if step == num_step_threshold:
        print("here")
    return 0.99999 ** (step - num_step_threshold)

start_time = time.time()
model = TabularDenseNet(num_features, num_classes, 1, 1).to(device)

optimizer = optim.Adam(model.parameters(), lr=0.001 * 1.0)
scheduler = LambdaLR(optimizer, lr_lambda=custom_lr_lambda)

criterion = CustomLoss(nn.CrossEntropyLoss(), 0.0, 0.0, 0.0, 0.0, 0.0)
evaluate_model(model, custom_train_loader, criterion, optimizer, 10000, scheduler, batch_size, num_features, early_stopping_patience=10000)

print(f"Execution time: {(time.time() - start_time):.6f} seconds")

In [None]:
evaluate_model(model, custom_train_loader, criterion, optimizer, 100000, scheduler, batch_size, num_features, early_stopping_patience=100000000000)

In [None]:
def custom_lr_lambda(step):
    num_step_threshold = 100

    if step < num_step_threshold:
        return step / num_step_threshold
    if step == num_step_threshold:
        print("here")
    return 0.99999 ** (step - num_step_threshold)

start_time = time.time()
model = TabularDenseNet(num_features, num_classes, 1, 1).to(device)

optimizer = optim.Adam(model.parameters(), lr=0.001 * 1.0)
scheduler = LambdaLR(optimizer, lr_lambda=custom_lr_lambda)

criterion = CustomLoss(nn.CrossEntropyLoss(), 0.0, 0.0, 0.0, 0.0, 0.0)
evaluate_model(model, custom_train_loader, criterion, optimizer, 10000, scheduler, batch_size, num_features, early_stopping_patience=10000)

print(f"Execution time: {(time.time() - start_time):.6f} seconds")

In [None]:
def custom_lr_lambda(step):
    num_step_threshold = 100

    if step < num_step_threshold:
        return step / num_step_threshold
    if step == num_step_threshold:
        print("here")
    return 0.99999 ** (step - num_step_threshold)

start_time = time.time()
model = TabularDenseNet(num_features, num_classes, 1, 1).to(device)

optimizer = optim.Adam(model.parameters(), lr=0.001 * 1.0)
scheduler = LambdaLR(optimizer, lr_lambda=custom_lr_lambda)

criterion = CustomLoss(nn.CrossEntropyLoss(), 0.0, 0.0, 0.0, 0.0, 0.0)
evaluate_model(model, custom_train_loader, criterion, optimizer, 10000, scheduler, batch_size, num_features, early_stopping_patience=10000)

print(f"Execution time: {(time.time() - start_time):.6f} seconds")

In [None]:
for name, param in model.named_parameters():
    print(name)
    print(param)

In [None]:
def custom_lr_lambda(step):
    num_step_threshold = 100

    if step < num_step_threshold:
        return step / num_step_threshold
    if step == num_step_threshold:
        print("here")
    return 0.99999 ** (step - num_step_threshold)

start_time = time.time()
model = TabularDenseNet(num_features, num_classes, 1, 2).to(device)

optimizer = optim.Adam(model.parameters(), lr=0.001 * 0.1)
scheduler = LambdaLR(optimizer, lr_lambda=custom_lr_lambda)

criterion = CustomLoss(nn.CrossEntropyLoss(), 0.0, 0.0, 0.0, 0.0, 0.0)
evaluate_model(model, custom_train_loader, criterion, optimizer, 10000, scheduler, batch_size, num_features, early_stopping_patience=10000)

print(f"Execution time: {(time.time() - start_time):.6f} seconds")

In [None]:
def custom_lr_lambda(step):
    num_step_threshold = 100

    if step < num_step_threshold:
        return step / num_step_threshold
    if step == num_step_threshold:
        print("here")
    return 0.99995 ** (step - num_step_threshold)

start_time = time.time()
model = TabularDenseNet(num_features, num_classes, 1, 2).to(device)

optimizer = optim.Adam(model.parameters(), lr=0.001 * 0.1)
scheduler = LambdaLR(optimizer, lr_lambda=custom_lr_lambda)

criterion = CustomLoss(nn.CrossEntropyLoss(), 0.0, 0.0, 0.0, 0.0, 0.0)
evaluate_model(model, custom_train_loader, criterion, optimizer, 10000, scheduler, batch_size, num_features, early_stopping_patience=10000)

print(f"Execution time: {(time.time() - start_time):.6f} seconds")

In [None]:
def custom_lr_lambda(step):
    num_step_threshold = 100

    if step < num_step_threshold:
        return step / num_step_threshold
    if step == num_step_threshold:
        print("here")
    return 0.99995 ** (step - num_step_threshold)

start_time = time.time()
model = TabularDenseNet(num_features, num_classes, 1, 1).to(device)

optimizer = optim.Adam(model.parameters(), lr=0.001 * 1.0)
scheduler = LambdaLR(optimizer, lr_lambda=custom_lr_lambda)

criterion = CustomLoss(nn.CrossEntropyLoss(), 0.0, 0.0, 0.0, 0.0, 0.0)
evaluate_model(model, custom_train_loader, criterion, optimizer, 1000, scheduler, batch_size, num_features, early_stopping_patience=10000)

print(f"Execution time: {(time.time() - start_time):.6f} seconds")

In [None]:
def custom_lr_lambda(step):
    num_step_threshold = 100

    if step < num_step_threshold:
        return step / num_step_threshold
    if step == num_step_threshold:
        print("here")
    return 0.99995 ** (step - num_step_threshold)

start_time = time.time()
model = TabularDenseNet(num_features, num_classes, 1, 2).to(device)

optimizer = optim.Adam(model.parameters(), lr=0.001 * 0.1)
scheduler = LambdaLR(optimizer, lr_lambda=custom_lr_lambda)

criterion = CustomLoss(nn.CrossEntropyLoss(), 0.0, 0.0, 0.0, 0.0, 0.0)
evaluate_model(model, custom_train_loader, criterion, optimizer, 100, scheduler, batch_size, num_features, early_stopping_patience=10000)

print(f"Execution time: {(time.time() - start_time):.6f} seconds")

In [None]:
def custom_lr_lambda(step):
    num_step_threshold = 100

    if step < num_step_threshold:
        return step / num_step_threshold
    if step == num_step_threshold:
        print("here")
    return 0.99995 ** (step - num_step_threshold)

start_time = time.time()
model = TabularDenseNet(num_features, num_classes, 1, 2).to(device)

optimizer = optim.Adam(model.parameters(), lr=0.001 * 0.1)
scheduler = LambdaLR(optimizer, lr_lambda=custom_lr_lambda)

criterion = CustomLoss(nn.CrossEntropyLoss(), 0.0, 0.0, 0.0, 0.0, 0.0)
evaluate_model(model, custom_train_loader, criterion, optimizer, 100, scheduler, batch_size, num_features, early_stopping_patience=10000)

print(f"Execution time: {(time.time() - start_time):.6f} seconds")

In [None]:
def custom_lr_lambda(step):
    num_step_threshold = 100

    if step < num_step_threshold:
        return step / num_step_threshold
    if step == num_step_threshold:
        print("here")
    return 0.99995 ** (step - num_step_threshold)

start_time = time.time()
model = TabularDenseNet(num_features, num_classes, 1, 2).to(device)

optimizer = optim.Adam(model.parameters(), lr=0.001 * 0.1)
scheduler = LambdaLR(optimizer, lr_lambda=custom_lr_lambda)

criterion = CustomLoss(nn.CrossEntropyLoss(), 0.0, 0.0, 0.0, 0.0, 0.0)
evaluate_model(model, custom_train_loader, criterion, optimizer, 100, scheduler, batch_size, num_features, early_stopping_patience=10000)

print(f"Execution time: {(time.time() - start_time):.6f} seconds")

In [None]:
def custom_lr_lambda(step):
    num_step_threshold = 100

    if step < num_step_threshold:
        return step / num_step_threshold
    if step == num_step_threshold:
        print("here")
    return 0.99995 ** (step - num_step_threshold)

start_time = time.time()
model = TabularDenseNet(num_features, num_classes, 1, 2).to(device)

optimizer = optim.Adam(model.parameters(), lr=0.001 * 0.1)
scheduler = LambdaLR(optimizer, lr_lambda=custom_lr_lambda)

criterion = CustomLoss(nn.CrossEntropyLoss(), 0.0, 0.0, 0.0, 0.0, 0.0)
evaluate_model(model, custom_train_loader, criterion, optimizer, 100, scheduler, batch_size, num_features, early_stopping_patience=10000)

print(f"Execution time: {(time.time() - start_time):.6f} seconds")

In [None]:
def custom_lr_lambda(step):
    num_step_threshold = 100

    if step < num_step_threshold:
        return step / num_step_threshold
    if step == num_step_threshold:
        print("here")
    return 0.99995 ** (step - num_step_threshold)

start_time = time.time()
model = TabularDenseNet(num_features, num_classes, 1, 2).to(device)

optimizer = optim.Adam(model.parameters(), lr=0.001 * 1.0)
scheduler = LambdaLR(optimizer, lr_lambda=custom_lr_lambda)

criterion = CustomLoss(nn.CrossEntropyLoss(), 0.0, 0.0, 0.0, 0.0, 0.0)
evaluate_model(model, custom_train_loader, criterion, optimizer, 1000, scheduler, batch_size, num_features, early_stopping_patience=10000)

print(f"Execution time: {(time.time() - start_time):.6f} seconds")

In [None]:
def custom_lr_lambda(step):
    num_step_threshold = 100

    if step < num_step_threshold:
        return step / num_step_threshold
    if step == num_step_threshold:
        print("here")
    return 0.99995 ** (step - num_step_threshold)

start_time = time.time()
model = TabularDenseNet(num_features, num_classes, 1, 8).to(device)

optimizer = optim.Adam(model.parameters(), lr=0.001 * 0.01)
scheduler = LambdaLR(optimizer, lr_lambda=custom_lr_lambda)

criterion = CustomLoss(nn.CrossEntropyLoss(), 0.0, 0.0, 0.0, 0.0, 0.0)
evaluate_model(model, custom_train_loader, criterion, optimizer, 1000, scheduler, batch_size, num_features, early_stopping_patience=10000)

print(f"Execution time: {(time.time() - start_time):.6f} seconds")

In [None]:
def custom_lr_lambda(step):
    num_step_threshold = 100

    if step < num_step_threshold:
        return step / num_step_threshold
    if step == num_step_threshold:
        print("here")
    return 0.99995 ** (step - num_step_threshold)

start_time = time.time()
model = TabularDenseNet(num_features, num_classes, 1, 10).to(device)

optimizer = optim.Adam(model.parameters(), lr=0.001 * 0.01)
scheduler = LambdaLR(optimizer, lr_lambda=custom_lr_lambda)

criterion = CustomLoss(nn.CrossEntropyLoss(), 0.0, 0.0, 0.0, 0.0, 0.0)
evaluate_model(model, custom_train_loader, criterion, optimizer, 1000, scheduler, batch_size, num_features, early_stopping_patience=10000)

print(f"Execution time: {(time.time() - start_time):.6f} seconds")

In [57]:
total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Total number of trainable parameters: {total_params}")

Total number of trainable parameters: 16901


In [58]:
total_params = sum(p.numel() for p in model.parameters())
print(f"Total number of parameters (trainable + non-trainable): {total_params}")

Total number of parameters (trainable + non-trainable): 16901


In [None]:
for name, param in model.named_parameters():
    print(name)
    print(param)

In [None]:
data = pd.read_csv('/kaggle/input/playground-series-s4e10/test.csv')

data = data.drop(["id"], axis=1)
data['source'] = 0

data['person_emp_length_missing'] = data['person_emp_length'].isna().astype(int)
data['loan_int_rate_missing'] = data['loan_int_rate'].isna().astype(int)

data['person_emp_length'] = data['person_emp_length'].fillna(median_emp_length)
data['loan_int_rate'] = data['loan_int_rate'].fillna(median_int_rate)

# grade_mapping = {'A': 7, 'B': 6, 'C': 5, 'D': 4, 'E': 3, 'F': 2, 'G': 1}
# data['loan_grade'] = data['loan_grade'].map(grade_mapping)

# purpose_mapping = {
#     'DEBTCONSOLIDATION': 1,
#     'HOMEIMPROVEMENT': 2,
#     'MEDICAL': 3,
#     'PERSONAL': 4,
#     'EDUCATION': 5,
#     'VENTURE': 6
# }
# data['loan_intent'] = data['loan_intent'].map(purpose_mapping)

# home_ownership_mapping = {
#     'OWN': 1,
#     'MORTGAGE': 2,
#     'OTHER': 3,
#     'RENT': 4
# }
# data['person_home_ownership'] = data['person_home_ownership'].map(home_ownership_mapping)

X = data.drop([], axis=1)
X = pd.get_dummies(X, drop_first=True)

column_to_log = [
    'person_age',
    'person_income',
]

column_to_sqrt = [
    'person_emp_length',
    'loan_percent_income',
]

for col in column_to_log:
    if (X[col] <= 0).any():
        print(f"Column '{col}' contains non-positive values. Adding 1 to avoid log of non-positive numbers.")
        X[col] = np.log(X[col] + 1)
    else:
        X[col] = np.log(X[col])

for col in column_to_sqrt:
    if (X[col] < 0).any():
        print(f"Column '{col}' contains negative values. Setting negative values to NaN before applying sqrt.")
        X[col] = np.sqrt(X[col].clip(lower=0))
    else:
        X[col] = np.sqrt(X[col])

print(data.isnull().sum())
print(X.columns)
print(X.columns.get_loc('source'))

In [None]:
print(X)

In [None]:
print(X.shape)
X_scaled_test = x_scaler.transform(X)
print(X_scaled_test.shape)
print(X_scaled_test)

In [None]:
X_scaled_test_tensor = torch.tensor(X_scaled_test).float().to(device)
outputs = model(X_scaled_test_tensor)
print(outputs)

In [None]:
probabilities = F.softmax(outputs, dim=1)
print(probabilities)

In [None]:
positive_class_probs = probabilities[:, 1]
print(positive_class_probs)

In [None]:
import pandas as pd

test_df = pd.read_csv('/kaggle/input/playground-series-s4e10/test.csv')
ids = test_df['id']

positive_class_probs = positive_class_probs.cpu().detach().numpy()

submission_df = pd.DataFrame({
    'id': ids,
    'loan_status': positive_class_probs
})

submission_df.to_csv('submission.csv', index=False)
print("Submission file created successfully.")