In [5]:
import numpy as np
import math
import pandas as pd
import matplotlib.pyplot as plt
from scipy.special import logit
from scipy.stats import norm

import tensorflow as tf
from keras import layers, models, datasets

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
from torchinfo import summary
from torch.optim.lr_scheduler import StepLR
import torch.autograd.profiler as profiler

from sklearn.model_selection import train_test_split, LeaveOneOut, StratifiedKFold, cross_val_predict
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder, PowerTransformer
from sklearn.metrics import f1_score, log_loss, accuracy_score
from sklearn.linear_model import LogisticRegression

import sys
import time
from learnable_activation import LearnableActivation

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [4]:
pip install learnable-activation==0.0.1

  pid, fd = os.forkpty()


Collecting learnable-activation==0.0.1
  Downloading learnable_activation-0.0.1-py3-none-any.whl.metadata (5.2 kB)
Downloading learnable_activation-0.0.1-py3-none-any.whl (5.0 kB)
Installing collected packages: learnable-activation
Successfully installed learnable-activation-0.0.1
Note: you may need to restart the kernel to use updated packages.


An interpolation based learning technique, driven through explicit regularization

In [6]:
def calculate_metrics(model, data_tensor, labels_tensor, batch_size=1024, num_features=54):
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for start_idx in range(0, len(data_tensor), batch_size):
            end_idx = min(start_idx + batch_size, len(data_tensor))
            inputs = data_tensor[start_idx:end_idx].view(-1, num_features)
            labels = labels_tensor[start_idx:end_idx]

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    accuracy = accuracy_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds, average='weighted')
    return accuracy, f1

In [7]:
class CustomDataLoader:
    def __init__(self, features, labels, validation_size=0.2, is_classification=True):
        if is_classification:
            train_data, val_data, train_labels, val_labels = train_test_split(
                features, labels, test_size=validation_size, stratify=labels, random_state=42
            )
            self.train_labels_tensor = torch.tensor(train_labels).long().to(device)
            self.val_labels_tensor = torch.tensor(val_labels).long().to(device)
        else:
            train_data, val_data, train_labels, val_labels = train_test_split(
                features, labels, test_size=validation_size, random_state=42
            )
            self.train_labels_tensor = torch.tensor(train_labels).float().to(device)
            self.val_labels_tensor = torch.tensor(val_labels).float().to(device)

        self.train_data_tensor = torch.tensor(train_data).float().to(device)
        self.val_data_tensor = torch.tensor(val_data).float().to(device)

In [8]:
def evaluate_model(model, custom_train_loader, criterion, optimizer, num_epochs, scheduler, batch_size=1024, num_features=54, is_classification=True):
    for epoch in range(num_epochs):
        running_loss = 0.0
        
        model.train()
        for start_idx in range(0, len(custom_train_loader.train_data_tensor), batch_size):
            end_idx = min(start_idx + batch_size, len(custom_train_loader.train_data_tensor))
            inputs = custom_train_loader.train_data_tensor[start_idx:end_idx].view(-1, num_features)
            labels = custom_train_loader.train_labels_tensor[start_idx:end_idx]

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels, model)
            loss.backward()
            optimizer.step()
            scheduler.step()
            running_loss += loss.item() * len(labels)
                      
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for start_idx in range(0, len(custom_train_loader.val_data_tensor), batch_size):
                end_idx = min(start_idx + batch_size, len(custom_train_loader.val_data_tensor))
                val_inputs = custom_train_loader.val_data_tensor[start_idx:end_idx].view(-1, num_features)
                val_labels = custom_train_loader.val_labels_tensor[start_idx:end_idx]

                val_outputs = model(val_inputs)
                val_loss += criterion.regular_loss(val_outputs, val_labels).item() * len(val_labels)

        avg_train_loss = running_loss / len(custom_train_loader.train_data_tensor)
        avg_val_loss = val_loss / len(custom_train_loader.val_data_tensor)

        print(f'Epoch {epoch + 1}, Training Loss: {avg_train_loss}, Validation Loss: {avg_val_loss}')

        if is_classification:
            train_accuracy, train_f1 = calculate_metrics(model, custom_train_loader.train_data_tensor, custom_train_loader.train_labels_tensor, num_features)
            val_accuracy, val_f1 = calculate_metrics(model, custom_train_loader.val_data_tensor, custom_train_loader.val_labels_tensor, num_features)

            print(f'Training Accuracy: {train_accuracy}, Training F1 Score: {train_f1}')
            print(f'Validation Accuracy: {val_accuracy}, Validation F1 Score: {val_f1}')
            
        print()


In [9]:
class CustomLoss(nn.Module):
    def __init__(self, criterion, l1_lambda=0.0, l2_lambda=0.0, f1_lambda=0.0, f2_lambda=0.0):
        super(CustomLoss, self).__init__()
        self.criterion = criterion
        self.l1_lambda = l1_lambda
        self.l2_lambda = l2_lambda
        self.f1_lambda = f1_lambda
        self.f2_lambda = f2_lambda

    def forward(self, outputs, labels, model):    
        l1_norm = sum(p.abs().sum() for name, module in model.named_modules()  if isinstance(module, nn.Linear) for p in module.parameters() if 'bias' not in name)
        l1_loss = self.l1_lambda * l1_norm
        
        l2_norm = sum(p.pow(2.0).sum() for name, module in model.named_modules() if isinstance(module, nn.Linear) for p in module.parameters() if 'bias' not in name)
        l2_loss = self.l2_lambda * l2_norm
                
        f1_loss = 0
        f2_loss = 0
        for name, module in model.named_modules():
            if isinstance(module, LearnableActivation):
                copy_tensor = module.copy_tensor
                                
                f1_diff = (copy_tensor[:, 1:] - copy_tensor[:, :-1])
                f1_loss += self.f1_lambda * f1_diff.abs().sum()

                f2_diff = f1_diff[:, 1:] - f1_diff[:, :-1]
                f2_loss += self.f2_lambda * f2_diff.abs().sum()

        return self.criterion(outputs, labels) + l1_loss + l2_loss + f1_loss + f2_loss
        
    def regular_loss(self, outputs, labels):
        return self.criterion(outputs, labels)

In [None]:
data = pd.read_csv('/kaggle/input/forest-cover-type-dataset/covtype.csv')
# data = pd.read_csv('/kaggle/input/breast-cancer-wisconsin-data/data.csv')

"""data = data.dropna()"""

"""
X = data[[
    "Elevation",
    "Aspect",
    "Slope",
    "Horizontal_Distance_To_Hydrology",
    "Vertical_Distance_To_Hydrology",
    "Horizontal_Distance_To_Roadways",
    "Hillshade_9am",
    "Hillshade_Noon",
    "Hillshade_3pm",
    "Horizontal_Distance_To_Fire_Points"
]]
"""

# print(data.columns)
# X = data[['radius_worst', 'concave points_worst']]
# X = data.drop(["id", "diagnosis", "Unnamed: 32"], axis=1)
# y = data["diagnosis"]
X = data.drop(["Cover_Type"], axis=1)
y = data["Cover_Type"]

X = pd.get_dummies(X, drop_first=True)
for col in X.columns:
    if (X[col] > 0).all():
        X[col] = np.log(X[col])

print(X.shape, y.shape)
print(X.columns)

In [None]:
x_scaler = StandardScaler()
x_scaled = x_scaler.fit_transform(X)

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

In [10]:
x_values = np.linspace(-3, 3, 10000)
y_values = np.cos(x_values * 4)

print(y_values)

[0.84385396 0.84256363 0.84126844 ... 0.84126844 0.84256363 0.84385396]


In [11]:
# class LearnableActivation(nn.Module):
#     def __init__(self, num_features, width=20, density=10):
#         super(LearnableActivation, self).__init__()
#         self.num_features = num_features
#         self.width = width
#         self.density = density
        
#         num_intervals = width * density
#         range_values = torch.linspace(-width / 2, width / 2, num_intervals + 1)
#         self.copy_tensor = nn.Parameter(range_values.repeat(num_features, 1))
#         self.feature_idx = torch.arange(self.num_features).view(1, -1)
        
#     def forward(self, x):
#         scaled_x = (x * self.density) + (self.width * self.density / 2)
        
#         lower_idx = torch.floor(scaled_x).long()
#         lower_idx = torch.clamp(lower_idx, min=0, max=self.copy_tensor.size(1) - 2)
#         upper_idx = lower_idx + 1

#         lower_value = self.copy_tensor[self.feature_idx, lower_idx]
#         upper_value = self.copy_tensor[self.feature_idx, upper_idx]
        
#         interp_factor = (scaled_x - lower_idx.float())
#         interpolated_value = torch.lerp(lower_value, upper_value, interp_factor)
#         return interpolated_value

In [12]:
class TabularDenseNet(nn.Module):
    def __init__(self, input_size, output_size, num_layers=2, width=20, density=10):
        super(TabularDenseNet, self).__init__()
                
        self.layers = nn.ModuleList()
        self.activations = nn.ModuleList()
        
        for i in range(num_layers):
            self.activations.append(LearnableActivation(input_size, width, density))
            self.layers.append(nn.Linear(input_size, input_size, bias=False))
            
            with torch.no_grad():
                self.layers[-1].weight.copy_(torch.eye(input_size))

            input_size *= 2

        self.activation_second_last_layer = LearnableActivation(input_size, width, density)
        self.last_layer = nn.Linear(input_size, output_size, bias=False)
        
        with torch.no_grad():
            self.last_layer.weight.copy_(torch.zeros(output_size, input_size))
        
        self.activation_last_layer = LearnableActivation(output_size, width, density)

    def forward(self, x):
        outputs = [x]
    
        for i in range(len(self.layers)):
            concatenated_outputs = torch.cat(outputs, dim=1)
            outputs.append(self.layers[i](self.activations[i](concatenated_outputs)))

        outputs = torch.cat(outputs, dim=1)
        outputs = self.activation_second_last_layer(outputs)
        outputs = self.last_layer(outputs)
        outputs = self.activation_last_layer(outputs)
        return outputs.squeeze()

In [13]:
num_epochs = 1
num_features = 1
num_classes = 1

model = TabularDenseNet(num_features, num_classes, 8, width=20, density=10).to(device)
criterion = CustomLoss(nn.MSELoss(), l1_lambda=0.001 * 0.0, l2_lambda=0.0, f1_lambda=0.001 * 0.001, f2_lambda=0.001 * 0.001)
custom_train_loader = CustomDataLoader(x_values, y_values, validation_size=0.2, is_classification=False)

for name, param in model.named_parameters():
    break
    print(f"Layer: {name}")
    print(f"Shape: {param.shape}")
    print(param)
    
total_params = sum(p.numel() for p in model.parameters())
print(f'Total number of parameters: {total_params}')

Total number of parameters: 125013


In [None]:
for name, param in model.named_parameters():
    print(name)
    print(param)

In [15]:
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler

optimizer = optim.Adam(model.parameters(), lr=0.01)
scheduler = lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.9995)

torch.cuda.synchronize()
start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 1000, scheduler, batch_size=1024, num_features=num_features, is_classification=False)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

Epoch 1, Training Loss: 6.33522889482975, Validation Loss: 0.8357346439361573

Epoch 2, Training Loss: 0.9096806735992432, Validation Loss: 0.8120481057167053

Epoch 3, Training Loss: 0.45139398431777955, Validation Loss: 0.3436638979911804

Epoch 4, Training Loss: 0.22252323317527772, Validation Loss: 0.10373967450857162

Epoch 5, Training Loss: 0.06764566293358802, Validation Loss: 0.022920332849025726

Epoch 6, Training Loss: 0.024744223058223724, Validation Loss: 0.005965645160526037

Epoch 7, Training Loss: 0.017106633335351944, Validation Loss: 0.0037310792822390793

Epoch 8, Training Loss: 0.014864832885563374, Validation Loss: 0.002122356364503503

Epoch 9, Training Loss: 0.013751370176672936, Validation Loss: 0.001525257327593863

Epoch 10, Training Loss: 0.013214686013758182, Validation Loss: 0.0012122828466817736

Epoch 11, Training Loss: 0.012908970244228839, Validation Loss: 0.0009618541202507914

Epoch 12, Training Loss: 0.012697959661483765, Validation Loss: 0.0008028683

KeyboardInterrupt: 

In [None]:
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler

optimizer = optim.Adam(model.parameters(), lr=0.01)
scheduler = lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.9995)

torch.cuda.synchronize()
start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 1000, scheduler, batch_size=1024, num_features=num_features, is_classification=False)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

In [None]:
evaluate_model(model, custom_train_loader, criterion, optimizer, 500, scheduler, 1024 * 16)

In [None]:
for name, param in model.named_parameters():
    print(name)
    print(param)

In [None]:
import torch
import matplotlib.pyplot as plt

# Step 1: Generate the tensor from -10 to 10
input_tensor = torch.linspace(-3, 3, 10000).to(device)  # 100 points between -10 and 10

# Assuming `model` is your pre-trained model
model.eval()  # Set the model to evaluation mode

# Step 2: Pass the tensor to the model and obtain the result
with torch.no_grad():  # Disable gradient calculation for evaluation
    output = model(input_tensor.unsqueeze(1))  # Add a dimension if model expects 2D input

# Step 3: Plot the outputs
plt.figure(figsize=(8, 5))
plt.plot(input_tensor.cpu().numpy(), output.cpu().numpy(), label='Model Output')
plt.xlabel('Input')
plt.ylabel('Output')
plt.title('Model Output vs. Input')
plt.legend()
plt.grid(True)
plt.show()


In [None]:
print(model(torch.tensor([[0.5]]).to(device)))