In [1]:
import numpy as np
import math
import pandas as pd
import matplotlib.pyplot as plt
from scipy.special import logit
from scipy.stats import norm

import tensorflow as tf
from keras import layers, models, datasets

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
from torchinfo import summary
from torch.optim.lr_scheduler import StepLR
import torch.autograd.profiler as profiler

from sklearn.model_selection import train_test_split, LeaveOneOut, StratifiedKFold, cross_val_predict
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder, PowerTransformer
from sklearn.metrics import f1_score, log_loss, accuracy_score
from sklearn.linear_model import LogisticRegression

import sys
import time

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

An interpolation based learning technique, driven through explicit regularization

In [2]:
def calculate_metrics(model, data_tensor, labels_tensor, batch_size=1024):
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for start_idx in range(0, len(data_tensor), batch_size):
            end_idx = min(start_idx + batch_size, len(data_tensor))
            inputs = data_tensor[start_idx:end_idx].view(-1, 54)
            labels = labels_tensor[start_idx:end_idx]

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    accuracy = accuracy_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds, average='weighted')
    return accuracy, f1

In [3]:
class CustomDataLoader:
    def __init__(self, features, labels, validation_size=0.2):
        train_data, val_data, train_labels, val_labels = train_test_split(
            features, labels, test_size=validation_size, stratify=labels, random_state=42
        )
        
        self.train_data_tensor = torch.tensor(train_data).float().to(device)
        self.train_labels_tensor = torch.tensor(train_labels).long().to(device)
        
        self.val_data_tensor = torch.tensor(val_data).float().to(device)
        self.val_labels_tensor = torch.tensor(val_labels).long().to(device)

In [134]:
def evaluate_model(model, custom_train_loader, criterion, optimizer, num_epochs, scheduler, batch_size=1024):
    unregularized_criterion = nn.CrossEntropyLoss()

    for epoch in range(num_epochs):
        running_loss = 0.0
        
        model.train()
        for start_idx in range(0, len(custom_train_loader.train_data_tensor), batch_size):
            end_idx = min(start_idx + batch_size, len(custom_train_loader.train_data_tensor))
            inputs = custom_train_loader.train_data_tensor[start_idx:end_idx].view(-1, 54)
            labels = custom_train_loader.train_labels_tensor[start_idx:end_idx]

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels, model)
            loss.backward()
            optimizer.step()
            scheduler.step()
            running_loss += loss.item() * len(labels)
                      
        avg_train_loss = running_loss / len(custom_train_loader.train_data_tensor)

        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for start_idx in range(0, len(custom_train_loader.val_data_tensor), batch_size):
                end_idx = min(start_idx + batch_size, len(custom_train_loader.val_data_tensor))
                val_inputs = custom_train_loader.val_data_tensor[start_idx:end_idx].view(-1, 54)
                val_labels = custom_train_loader.val_labels_tensor[start_idx:end_idx]

                val_outputs = model(val_inputs)
                val_loss += unregularized_criterion(val_outputs, val_labels).item() * len(val_labels)

        avg_val_loss = val_loss / len(custom_train_loader.val_data_tensor)

        train_accuracy, train_f1 = calculate_metrics(model, custom_train_loader.train_data_tensor, custom_train_loader.train_labels_tensor)
        val_accuracy, val_f1 = calculate_metrics(model, custom_train_loader.val_data_tensor, custom_train_loader.val_labels_tensor)

        print(f'Epoch {epoch + 1}, Training Loss: {avg_train_loss}, Validation Loss: {avg_val_loss}')
        print(f'Training Accuracy: {train_accuracy}, Training F1 Score: {train_f1}')
        print(f'Validation Accuracy: {val_accuracy}, Validation F1 Score: {val_f1}')
        print()


In [132]:
class CustomLoss(nn.Module):
    def __init__(self, criterion, l1_lambda, l2_lambda):
        super(CustomLoss, self).__init__()
        self.criterion = criterion
        self.l1_lambda = l1_lambda
        self.l2_lambda = l2_lambda

    def forward(self, outputs, labels, model):
        loss = self.criterion(outputs, labels)
        
        l1_norm = sum(p.abs().sum() for name, p in model.named_parameters() if 'bias' not in name)
        l2_norm = sum(p.pow(2.0).sum() for name, p in model.named_parameters() if 'bias' not in name)
        
        loss += self.l1_lambda * l1_norm + self.l2_lambda * l2_norm
        return loss

In [6]:
data = pd.read_csv('/kaggle/input/forest-cover-type-dataset/covtype.csv')
# data = pd.read_csv('/kaggle/input/breast-cancer-wisconsin-data/data.csv')

"""data = data.dropna()"""

"""
X = data[[
    "Elevation",
    "Aspect",
    "Slope",
    "Horizontal_Distance_To_Hydrology",
    "Vertical_Distance_To_Hydrology",
    "Horizontal_Distance_To_Roadways",
    "Hillshade_9am",
    "Hillshade_Noon",
    "Hillshade_3pm",
    "Horizontal_Distance_To_Fire_Points"
]]
"""

# print(data.columns)
# X = data[['radius_worst', 'concave points_worst']]
# X = data.drop(["id", "diagnosis", "Unnamed: 32"], axis=1)
# y = data["diagnosis"]
X = data.drop(["Cover_Type"], axis=1)
y = data["Cover_Type"]

X = pd.get_dummies(X, drop_first=True)
for col in X.columns:
    if (X[col] > 0).all():
        X[col] = np.log(X[col])

print(X.shape, y.shape)
print(X.columns)

(581012, 54) (581012,)
Index(['Elevation', 'Aspect', 'Slope', 'Horizontal_Distance_To_Hydrology',
       'Vertical_Distance_To_Hydrology', 'Horizontal_Distance_To_Roadways',
       'Hillshade_9am', 'Hillshade_Noon', 'Hillshade_3pm',
       'Horizontal_Distance_To_Fire_Points', 'Wilderness_Area1',
       'Wilderness_Area2', 'Wilderness_Area3', 'Wilderness_Area4',
       'Soil_Type1', 'Soil_Type2', 'Soil_Type3', 'Soil_Type4', 'Soil_Type5',
       'Soil_Type6', 'Soil_Type7', 'Soil_Type8', 'Soil_Type9', 'Soil_Type10',
       'Soil_Type11', 'Soil_Type12', 'Soil_Type13', 'Soil_Type14',
       'Soil_Type15', 'Soil_Type16', 'Soil_Type17', 'Soil_Type18',
       'Soil_Type19', 'Soil_Type20', 'Soil_Type21', 'Soil_Type22',
       'Soil_Type23', 'Soil_Type24', 'Soil_Type25', 'Soil_Type26',
       'Soil_Type27', 'Soil_Type28', 'Soil_Type29', 'Soil_Type30',
       'Soil_Type31', 'Soil_Type32', 'Soil_Type33', 'Soil_Type34',
       'Soil_Type35', 'Soil_Type36', 'Soil_Type37', 'Soil_Type38',
       'Soi

In [108]:
x_scaler = StandardScaler()
x_scaled = x_scaler.fit_transform(X)

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

In [109]:
class TestClass(torch.nn.Module):
    def __init__(self, control_points, num_features, num_classes):
        super(TestClass, self).__init__()
        self.control_points = control_points
        self.num_features = num_features
        self.num_classes = num_classes
        
        self.copy_tensor = nn.Parameter(torch.zeros(self.num_features, self.num_classes, self.control_points + 2))
        self.feature_idx = torch.arange(self.num_features).view(1, -1, 1).to(device)
        self.class_idx = torch.arange(self.num_classes).view(1, 1, -1).to(device)
        
    def forward(self, x):        
        scaled_x = x * self.control_points
        
        lower_idx = torch.floor(scaled_x).long()
        upper_idx = lower_idx + 1

        lower_value = self.copy_tensor[self.feature_idx, self.class_idx, lower_idx.unsqueeze(-1)]
        upper_value = self.copy_tensor[self.feature_idx, self.class_idx, upper_idx.unsqueeze(-1)]

        interp_factor = (scaled_x - lower_idx.float()).unsqueeze(-1)
        interpolated_value = torch.lerp(lower_value, upper_value, interp_factor)
        """
        interpolated_value = lower_value + (upper_value - lower_value) * interp_factor
        """

        summed_tensor = interpolated_value.sum(dim=1)
        return summed_tensor


In [110]:
class TestClass(torch.nn.Module):
    def __init__(self, control_points, num_features, num_classes):
        super(TestClass, self).__init__()
        self.control_points = control_points
        self.num_features = num_features
        self.num_classes = num_classes
        self.num_tensors = 1
        
        self.copy_tensor = nn.Parameter(torch.rand(self.num_tensors, self.num_features, self.num_features * 8, self.control_points + 2) * 2 - 1)
        self.feature_idx = torch.arange(self.num_features).view(1, -1, 1).to(device)
        self.class_idx = torch.arange(self.num_features * 8).view(1, 1, -1).to(device)

        self.final_tensor = nn.Parameter(torch.zeros(self.num_features * 8, self.num_classes, self.control_points + 2))
        self.final_feature_idx = torch.arange(self.num_features * 8).view(1, -1, 1).to(device)
        self.final_class_idx = torch.arange(self.num_classes).view(1, 1, -1).to(device)
        
    def forward(self, x):        
        scaled_x = x * self.control_points
        
        for i in range(self.num_tensors):
            lower_idx = torch.floor(scaled_x).long()
            upper_idx = lower_idx + 1        

            copy_tensor = self.copy_tensor[i]
            lower_value = copy_tensor[self.feature_idx, self.class_idx, lower_idx.unsqueeze(-1)]
            upper_value = copy_tensor[self.feature_idx, self.class_idx, upper_idx.unsqueeze(-1)]

            interp_factor = (scaled_x - lower_idx.float()).unsqueeze(-1)
            interpolated_value = torch.lerp(lower_value, upper_value, interp_factor)

            summed_tensor = interpolated_value.sum(dim=1)
            scaled_x = torch.sigmoid(summed_tensor) * self.control_points
            
        lower_idx = torch.floor(scaled_x).long()
        upper_idx = lower_idx + 1

        lower_value = self.final_tensor[self.final_feature_idx, self.final_class_idx, lower_idx.unsqueeze(-1)]
        upper_value = self.final_tensor[self.final_feature_idx, self.final_class_idx, upper_idx.unsqueeze(-1)]

        interp_factor = (scaled_x - lower_idx.float()).unsqueeze(-1)
        interpolated_value = torch.lerp(lower_value, upper_value, interp_factor)

        summed_tensor = interpolated_value.sum(dim=1)
        return summed_tensor

In [11]:
class CustomLayer(nn.Module):
    def __init__(self, control_points, num_features, num_outputs):
        super(CustomLayer, self).__init__()
        
        self.control_points = control_points
        self.num_features = num_features
        self.num_outputs = num_outputs
        
        self.copy_tensor = nn.Parameter(torch.zeros(self.num_features, self.num_outputs, self.control_points + 2))

        self.feature_idx = torch.arange(self.num_features).view(1, -1, 1).to(device)
        self.output_idx = torch.arange(self.num_outputs).view(1, 1, -1).to(device)

    def forward(self, x):
        scaled_x = x * self.control_points
        
        lower_idx = torch.floor(scaled_x).long()
        upper_idx = lower_idx + 1

        lower_value = self.copy_tensor[self.feature_idx, self.output_idx, lower_idx.unsqueeze(-1)]
        upper_value = self.copy_tensor[self.feature_idx, self.output_idx, upper_idx.unsqueeze(-1)]

        interp_factor = (scaled_x - lower_idx.float()).unsqueeze(-1)
        interpolated_value = torch.lerp(lower_value, upper_value, interp_factor)
        
        return interpolated_value.sum(dim=1)

In [91]:
class TestClass(torch.nn.Module):
    def __init__(self, control_points, num_features, num_classes, num_layers=4):
        super(TestClass, self).__init__()
        self.activation = nn.Sigmoid()
        self.copy_tensor = None
        
        self.first_layer = CustomLayer(control_points, num_features, num_features)
        
        self.layers = nn.ModuleList()
        layer_size = num_features
        for i in range(num_layers):
            self.layers.append(CustomLayer(control_points, layer_size, layer_size))
            layer_size *= 2
            
        self.last_layer = CustomLayer(control_points, layer_size, num_classes)

    def forward(self, x):
        outputs = [self.activation(self.first_layer(x))]
        print(outputs)
        return None
        for layer in self.layers:
            concatenated_outputs = torch.cat(outputs, dim=1)
            outputs.append(self.activation(layer(concatenated_outputs)))

        concatenated_outputs = torch.cat(outputs, dim=1)
        return self.last_layer(concatenated_outputs)


In [92]:
num_epochs = 1
num_features = 54
num_classes = 7

model = TestClass(50, num_features, num_classes).to(device)
criterion = CustomLoss(nn.CrossEntropyLoss(), first_order_weight=0.0, second_order_weight=0.0)
custom_train_loader = CustomDataLoader(x_scaled, y_encoded, validation_size=0.2)

for name, param in model.named_parameters():
    break
    print(f"Layer: {name}")
    print(f"Shape: {param.shape}")
    print(param)
    
total_params = sum(p.numel() for p in model.parameters())
print(f'Total number of parameters: {total_params}')

Total number of parameters: 13354848


In [14]:
optimizer = optim.Adam(model.parameters(), lr=0.0001)

torch.cuda.synchronize()
start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 10, 1024 * 1)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

1.945911169052124
1.8539522886276245
1.8163913488388062
1.715201497077942
1.6285886764526367
1.5590041875839233
1.4995362758636475
1.434444546699524
1.4196561574935913
1.3613539934158325
1.342638611793518
1.3134902715682983
1.307121753692627
1.256211280822754
1.2345473766326904
1.2597142457962036
1.322450876235962
1.2466729879379272
1.2229632139205933
1.2213397026062012
1.2172123193740845
1.246507167816162
1.267657995223999
1.1504957675933838
1.2170944213867188
1.2137398719787598
1.1827181577682495
1.1604377031326294
1.243660569190979
1.202210783958435
1.1625198125839233
1.206444501876831
1.2000513076782227
1.1536914110183716
1.1840262413024902
1.0890932083129883
1.1023576259613037
1.0915223360061646
1.0756689310073853
1.066365122795105
1.0776621103286743
1.059004783630371
1.0676666498184204
1.0391112565994263
1.0311548709869385
1.02267324924469
1.0441426038742065
1.0317800045013428
0.9669236540794373
1.0050534009933472
1.0003108978271484
0.972579300403595
0.9390429258346558
0.97915685

KeyboardInterrupt: 

In [121]:
class CustomLinearLayer(nn.Module):
    def __init__(self, input_size, output_size, init="default"):
        super(CustomLinearLayer, self).__init__()
        self.linear = nn.Linear(input_size, output_size, bias=True)
        nn.init.zeros_(self.linear.bias)
        
        if init == "zero":
            nn.init.zeros_(self.linear.weight)
        elif init == "splits_inputs":
            self.splits_inputs_init()
        elif init == "looks_linear":
            self.looks_linear_init()

    def looks_linear_init(self):
        with torch.no_grad():
            size = self.linear.weight.size(0)
            weight = torch.zeros(size, size)

            indices = torch.arange(0, size, step=2)

            weight[indices, indices] = 1
            weight[indices, indices + 1] = -1
            weight[indices + 1, indices] = -1
            weight[indices + 1, indices + 1] = 1

            self.linear.weight.copy_(weight)
        
        """ Example matrix: [
            [1, -1, 0, 0],
            [-1, 1, 0, 0],
            [0, 0, 1, -1],
            [0, 0, -1, 1]
        ] """
            
    def splits_inputs_init(self):
        with torch.no_grad():
            weight = torch.zeros(self.linear.out_features, self.linear.in_features)

            for i in range(self.linear.in_features):
                weight[2 * i, i] = 1
                weight[2 * i + 1, i] = -1

            self.linear.weight.copy_(weight)
            
        """ Example matrix: [
            [1, 0, 0],
            [-1, 0, 0],
            [0, 1, 0],
            [0, -1, 0],
            [0, 0, 1],
            [0, 0, -1]
        ] """
        self.linear.weight.requires_grad = False

    def forward(self, x):
        return self.linear(x)

In [122]:
class customReLU(torch.autograd.Function):
    @staticmethod
    def forward(ctx, input):
        ctx.save_for_backward(input)
        return input.clamp(min=0)

    @staticmethod
    def backward(ctx, grad_output):
        (input,) = ctx.saved_tensors
        grad_input = torch.zeros_like(input)
        
        grad_input[input > 0] = grad_output[input > 0]
        grad_input[input == 0] = grad_output[input == 0] * 0.5
        return grad_input

def custom_relu(x):
    return customReLU.apply(x)

In [123]:
class TabularDenseNet(nn.Module):
    def __init__(self, input_size, num_layers, output_size):
        super(TabularDenseNet, self).__init__()
        self.copy_tensor = None
        self.activation = nn.GELU()
        
        layer_size = input_size * 2
        self.first_layer = CustomLinearLayer(input_size, layer_size, init="splits_inputs")
        
        self.layers = nn.ModuleList()
        for i in range(num_layers):
            self.layers.append(CustomLinearLayer(layer_size, layer_size, init="looks_linear"))
            layer_size *= 2
            
        self.last_layer = CustomLinearLayer(layer_size, output_size, init="zero")

    def forward(self, x):
        outputs = [custom_relu(self.first_layer(x))]
    
        for layer in self.layers:
            concatenated_outputs = torch.cat(outputs, dim=1)
            outputs.append(custom_relu(layer(concatenated_outputs)))

        concatenated_outputs = torch.cat(outputs, dim=1)
        return self.last_layer(concatenated_outputs)

In [129]:
num_epochs = 1
num_features = 54
num_classes = 7

model = TabularDenseNet(num_features, 6, num_classes).to(device)
criterion = CustomLoss(nn.CrossEntropyLoss(), first_order_weight=0.0, second_order_weight=0.0)
custom_train_loader = CustomDataLoader(x_scaled, y_encoded, validation_size=0.2)

for name, param in model.named_parameters():
    break
    print(f"Layer: {name}")
    print(f"Shape: {param.shape}")
    print(param)
    
total_params = sum(p.numel() for p in model.parameters())
print(f'Total number of parameters: {total_params}')

Total number of parameters: 15982495


In [115]:
for name, param in model.named_parameters():
    print(name)
    print(param)

first_layer.linear.weight
Parameter containing:
tensor([[ 1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  ...,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.],
        [-1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  ...,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.],
        [ 0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  ...,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.],
        [ 0., -1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  ...,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  ...,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.],
        [ 0.,  0., -1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  ...,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  ...,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0., -1.,  0.,  0.,  0.,  0.,  0.,  0.,  ...,  0.,  0.,  0

In [130]:
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler

optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.9995)

torch.cuda.synchronize()
start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 100, scheduler, 1024 * 16)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

Epoch 1, Training Loss: 0.7631811161969053, Validation Loss: 0.6049645809301719
Training Accuracy: 0.7458439918332046, Training F1 Score: 0.7342838693278908
Validation Accuracy: 0.746899821863463, Validation F1 Score: 0.7357112266037402

Epoch 2, Training Loss: 0.5659122141144055, Validation Loss: 0.5301073068942687
Training Accuracy: 0.7727044872194816, Training F1 Score: 0.7662282535716993
Validation Accuracy: 0.7730867533540442, Validation F1 Score: 0.7670259138198642

Epoch 3, Training Loss: 0.49747702820954093, Validation Loss: 0.47047423482296175
Training Accuracy: 0.7997349448913424, Training F1 Score: 0.7967041193827643
Validation Accuracy: 0.7977849108887034, Validation F1 Score: 0.7948715407853291

Epoch 4, Training Loss: 0.44008046138619716, Validation Loss: 0.4296236481197065
Training Accuracy: 0.819790494590251, Training F1 Score: 0.8154495921973002
Validation Accuracy: 0.8181028028536268, Validation F1 Score: 0.8137423959897123

Epoch 5, Training Loss: 0.39515685453323335

In [136]:
criterion = CustomLoss(nn.CrossEntropyLoss(), l1_lambda=0.001 * 0.001, l2_lambda=0.000)
evaluate_model(model, custom_train_loader, criterion, optimizer, 5, scheduler, 1024 * 16)

Epoch 1, Training Loss: 0.4068671660701097, Validation Loss: 0.3232626430720744
Training Accuracy: 0.8779971988494198, Training F1 Score: 0.8766222488126303
Validation Accuracy: 0.8740910303520563, Validation F1 Score: 0.8727003746293566

Epoch 2, Training Loss: 0.34153788010821173, Validation Loss: 0.2800840114670676
Training Accuracy: 0.8974869247368273, Training F1 Score: 0.8963194646122477
Validation Accuracy: 0.8922661204960285, Validation F1 Score: 0.8910005880572152

Epoch 3, Training Loss: 0.30741332398555826, Validation Loss: 0.25716636766968054
Training Accuracy: 0.9073167688233231, Training F1 Score: 0.9063392478887443
Validation Accuracy: 0.9018183695773775, Validation F1 Score: 0.9007810831017664

Epoch 4, Training Loss: 0.2856579917746917, Validation Loss: 0.24097073410499256
Training Accuracy: 0.9144207620764658, Training F1 Score: 0.9136148467649793
Validation Accuracy: 0.9088061409774274, Validation F1 Score: 0.907951415023494

Epoch 5, Training Loss: 0.270301559166673

In [137]:
evaluate_model(model, custom_train_loader, criterion, optimizer, 5, scheduler, 1024 * 16)

Epoch 1, Training Loss: 0.25826393838581835, Validation Loss: 0.218076343175137
Training Accuracy: 0.9245991364194756, Training F1 Score: 0.9240301797851345
Validation Accuracy: 0.917136390626748, Validation F1 Score: 0.916487911701331

Epoch 2, Training Loss: 0.24857534973569392, Validation Loss: 0.21029824574821068
Training Accuracy: 0.9275272208584602, Training F1 Score: 0.9270130136884623
Validation Accuracy: 0.9194857275629716, Validation F1 Score: 0.9188864608172448

Epoch 3, Training Loss: 0.24054947699908305, Validation Loss: 0.20218904001611157
Training Accuracy: 0.9313761136294693, Training F1 Score: 0.9309510736961516
Validation Accuracy: 0.9231086977100419, Validation F1 Score: 0.922613686187858

Epoch 4, Training Loss: 0.23273020475105996, Validation Loss: 0.1960009202417145
Training Accuracy: 0.9340481789294097, Training F1 Score: 0.9336565368028746
Validation Accuracy: 0.9251912601223721, Validation F1 Score: 0.924724325807495

Epoch 5, Training Loss: 0.22615871474063906

In [138]:
for name, param in model.named_parameters():
    print(name)
    print(param)

first_layer.linear.weight
Parameter containing:
tensor([[ 1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  ...,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.],
        [-1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  ...,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.],
        [ 0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  ...,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.],
        [ 0., -1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  ...,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  ...,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.],
        [ 0.,  0., -1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  ...,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  ...,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0., -1.,  0.,  0.,  0.,  0.,  0.,  0.,  ...,  0.,  0.,  0

In [37]:
optimizer = optim.Adam(model.parameters(), lr=0.001)

torch.cuda.synchronize()
start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 10, 1024 * 16)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

Epoch 1, Training Loss: 1.0655988254591775, Validation Loss: 0.7915980608723059
Training Accuracy: 0.6703183458151628, Training F1 Score: 0.6367499655522032
Validation Accuracy: 0.6688811820693097, Validation F1 Score: 0.6353570301802609

Epoch 2, Training Loss: 0.7162294657670648, Validation Loss: 0.6859404292588623
Training Accuracy: 0.7093472802807175, Training F1 Score: 0.6910844651502446
Validation Accuracy: 0.7091125013984149, Validation F1 Score: 0.6907912643993278

Epoch 3, Training Loss: 0.6556414300141757, Validation Loss: 0.6401570571434966
Training Accuracy: 0.7271847145816884, Training F1 Score: 0.7134759129666042
Validation Accuracy: 0.7268831269416453, Validation F1 Score: 0.7132260566642362

Epoch 4, Training Loss: 0.6234223745597189, Validation Loss: 0.6195389865843056
Training Accuracy: 0.7335378617883905, Training F1 Score: 0.7173529678995823
Validation Accuracy: 0.7315645895544866, Validation F1 Score: 0.715376273662036

Epoch 5, Training Loss: 0.5997171407423667, V

In [128]:
for name, param in model.named_parameters():
    print(f"Parameter Name: {name}")
    print(param)

Parameter Name: first_layer.linear.weight
Parameter containing:
tensor([[ 1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  ...,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.],
        [-1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  ...,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.],
        [ 0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  ...,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.],
        [ 0., -1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  ...,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  ...,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.],
        [ 0.,  0., -1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  ...,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  ...,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0., -1.,  0.,  0.,  0.,  0.,  0.,  0.,  .

In [38]:
torch.cuda.synchronize()
start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 20, 1024 * 16)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

Epoch 1, Training Loss: 0.46187055407704974, Validation Loss: 0.45659633310192227
Training Accuracy: 0.8091452618172196, Training F1 Score: 0.8031132165885962
Validation Accuracy: 0.8077416245707942, Validation F1 Score: 0.8019075643506941

Epoch 2, Training Loss: 0.4436647745006696, Validation Loss: 0.43283464763916135
Training Accuracy: 0.8202681101269553, Training F1 Score: 0.8187171411119616
Validation Accuracy: 0.8173369018011584, Validation F1 Score: 0.8159239445570963

Epoch 3, Training Loss: 0.4340482068153286, Validation Loss: 0.42634240369568477
Training Accuracy: 0.8242869651835485, Training F1 Score: 0.8208560143068604
Validation Accuracy: 0.8210373226164557, Validation F1 Score: 0.8177703077557913

Epoch 4, Training Loss: 0.4044414708486058, Validation Loss: 0.41213445558972317
Training Accuracy: 0.8296741242101594, Training F1 Score: 0.8255909437985843
Validation Accuracy: 0.8271387141467949, Validation F1 Score: 0.8231474386760982

Epoch 5, Training Loss: 0.3915267355477

In [39]:
optimizer = optim.Adam(model.parameters(), lr=0.0005)

torch.cuda.synchronize()
start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 10, 1024 * 16)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

Epoch 1, Training Loss: 0.4832981421382457, Validation Loss: 0.28052039835777887
Training Accuracy: 0.8896428425439267, Training F1 Score: 0.8880123027005108
Validation Accuracy: 0.8851406590191303, Validation F1 Score: 0.8834208061592084

Epoch 2, Training Loss: 0.24218053046919955, Validation Loss: 0.2405916653315699
Training Accuracy: 0.9111936300717068, Training F1 Score: 0.9106527102036942
Validation Accuracy: 0.9046496217825701, Validation F1 Score: 0.9040364159201201

Epoch 3, Training Loss: 0.22201471403067238, Validation Loss: 0.23294533299483985
Training Accuracy: 0.9152576649763666, Training F1 Score: 0.914926472659158
Validation Accuracy: 0.9086770565303822, Validation F1 Score: 0.9082882072967442

Epoch 4, Training Loss: 0.21588015815661285, Validation Loss: 0.22925650259786215
Training Accuracy: 0.9167980826533049, Training F1 Score: 0.9165233187522076
Validation Accuracy: 0.9102690980439403, Validation F1 Score: 0.9099442322053461

Epoch 5, Training Loss: 0.2122704651673

In [40]:
evaluate_model(model, custom_train_loader, criterion, optimizer, 20, 1024 * 16)

Epoch 1, Training Loss: 0.19556368032406754, Validation Loss: 0.21258746848952584
Training Accuracy: 0.9249562723613355, Training F1 Score: 0.9248662991308148
Validation Accuracy: 0.9169987005498997, Validation F1 Score: 0.9168945693979441

Epoch 2, Training Loss: 0.19315933476198208, Validation Loss: 0.20994883616611287
Training Accuracy: 0.9261180398830487, Training F1 Score: 0.9260122439050491
Validation Accuracy: 0.9180657986454739, Validation F1 Score: 0.9179444012113159

Epoch 3, Training Loss: 0.1908943934996064, Validation Loss: 0.20769453433510274
Training Accuracy: 0.9269484885189401, Training F1 Score: 0.9267984641835668
Validation Accuracy: 0.9187198265105032, Validation F1 Score: 0.9185286275904471

Epoch 4, Training Loss: 0.18837908832293598, Validation Loss: 0.20535351621438136
Training Accuracy: 0.9280371077152121, Training F1 Score: 0.92787846666878
Validation Accuracy: 0.9194254881543505, Validation F1 Score: 0.9192230292636936

Epoch 5, Training Loss: 0.1850403207893

In [41]:
for param_group in optimizer.param_groups:
    param_group['lr'] = 0.0002

torch.cuda.synchronize()
start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 10, 1024 * 16)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

Epoch 1, Training Loss: 0.14379763308671362, Validation Loss: 0.16577850440881653
Training Accuracy: 0.9467265048654393, Training F1 Score: 0.9466305292780136
Validation Accuracy: 0.9354577764773715, Validation F1 Score: 0.9352948471862124

Epoch 2, Training Loss: 0.13807196640882904, Validation Loss: 0.16360953252927946
Training Accuracy: 0.9480066005606604, Training F1 Score: 0.9479556433405829
Validation Accuracy: 0.9364388182749155, Validation F1 Score: 0.9363334707795365

Epoch 3, Training Loss: 0.13655829844459322, Validation Loss: 0.1626027895133869
Training Accuracy: 0.9486670869109677, Training F1 Score: 0.9486290947130912
Validation Accuracy: 0.9370153954717176, Validation F1 Score: 0.9369339159095087

Epoch 4, Training Loss: 0.13536562498523103, Validation Loss: 0.1618144296230665
Training Accuracy: 0.9490930683355959, Training F1 Score: 0.9490589821208834
Validation Accuracy: 0.9374542825916715, Validation F1 Score: 0.9373767362009776

Epoch 5, Training Loss: 0.134289531603

In [42]:
for param_group in optimizer.param_groups:
    param_group['lr'] = 0.0001

torch.cuda.synchronize()
start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 5, 1024 * 16)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

Epoch 1, Training Loss: 0.12786425165094453, Validation Loss: 0.15829317533588666
Training Accuracy: 0.9507819340847531, Training F1 Score: 0.9506746247958024
Validation Accuracy: 0.938693493283306, Validation F1 Score: 0.9385165565295784

Epoch 2, Training Loss: 0.1276408696758141, Validation Loss: 0.1581428434846313
Training Accuracy: 0.9508550824101943, Training F1 Score: 0.9507536351713198
Validation Accuracy: 0.9387451270621241, Validation F1 Score: 0.9385709520551978

Epoch 3, Training Loss: 0.12732115726626095, Validation Loss: 0.1573824483101115
Training Accuracy: 0.9513305465255621, Training F1 Score: 0.9512293792093213
Validation Accuracy: 0.9389430565475935, Validation F1 Score: 0.9387658279429677

Epoch 4, Training Loss: 0.12688898075054048, Validation Loss: 0.15602797028228543
Training Accuracy: 0.9520469698306187, Training F1 Score: 0.9519512816516009
Validation Accuracy: 0.9395540562636077, Validation F1 Score: 0.9393879345355352

Epoch 5, Training Loss: 0.12625972611936

In [43]:
for param_group in optimizer.param_groups:
    param_group['lr'] = 0.0005

torch.cuda.synchronize()
start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 5, 1024 * 16)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

Epoch 1, Training Loss: 0.5471089795353995, Validation Loss: 0.29028088985177347
Training Accuracy: 0.8848925042329214, Training F1 Score: 0.8839382038144373
Validation Accuracy: 0.8807173653003795, Validation F1 Score: 0.8797414446247134

Epoch 2, Training Loss: 0.22933137161242187, Validation Loss: 0.20954105018485564
Training Accuracy: 0.9252832884044844, Training F1 Score: 0.9251237091191479
Validation Accuracy: 0.918151854943504, Validation F1 Score: 0.917944418889338

Epoch 3, Training Loss: 0.17783364648579775, Validation Loss: 0.18467929444810044
Training Accuracy: 0.9369891718964134, Training F1 Score: 0.9368706531628563
Validation Accuracy: 0.9280999629957919, Validation F1 Score: 0.9279164487819294

Epoch 4, Training Loss: 0.15746817645092853, Validation Loss: 0.17289367829075616
Training Accuracy: 0.9430066973746205, Training F1 Score: 0.9429727279587908
Validation Accuracy: 0.933237523988193, Validation F1 Score: 0.9331618370836294

Epoch 5, Training Loss: 0.14754376587778

In [44]:
evaluate_model(model, custom_train_loader, criterion, optimizer, 10, 1024 * 16)

Epoch 1, Training Loss: 0.1420469445767831, Validation Loss: 0.16379475793291157
Training Accuracy: 0.9470664294366073, Training F1 Score: 0.9469446100653659
Validation Accuracy: 0.9356815228522499, Validation F1 Score: 0.9354948313063487

Epoch 2, Training Loss: 0.1374254807310955, Validation Loss: 0.16244263718689622
Training Accuracy: 0.9478237297470574, Training F1 Score: 0.947676116671323
Validation Accuracy: 0.9362753113086582, Validation F1 Score: 0.9360504329009303

Epoch 3, Training Loss: 0.13343840026647394, Validation Loss: 0.16008564715595616
Training Accuracy: 0.949467415648148, Training F1 Score: 0.9493709220407435
Validation Accuracy: 0.9375231276300956, Validation F1 Score: 0.9373637995839212

Epoch 4, Training Loss: 0.1308913204877495, Validation Loss: 0.15804413020032218
Training Accuracy: 0.9506894229672833, Training F1 Score: 0.9506387203690648
Validation Accuracy: 0.9384869581680335, Validation F1 Score: 0.9383802786884938

Epoch 5, Training Loss: 0.128620077841047

In [45]:
evaluate_model(model, custom_train_loader, criterion, optimizer, 10, 1024 * 16)

Epoch 1, Training Loss: 0.11787714887081889, Validation Loss: 0.1487572648437567
Training Accuracy: 0.9555537866091233, Training F1 Score: 0.9555322846812451
Validation Accuracy: 0.9419722382382555, Validation F1 Score: 0.9419201109443353

Epoch 2, Training Loss: 0.116244681601608, Validation Loss: 0.14796671430039227
Training Accuracy: 0.956007736511126, Training F1 Score: 0.9559881340958213
Validation Accuracy: 0.942247618391952, Validation F1 Score: 0.9421971464043155

Epoch 3, Training Loss: 0.11478185112258762, Validation Loss: 0.1473187245781526
Training Accuracy: 0.9563863866663511, Training F1 Score: 0.956367202321877
Validation Accuracy: 0.9426778998821028, Validation F1 Score: 0.9426240585922683

Epoch 4, Training Loss: 0.11352190938833032, Validation Loss: 0.14653194336188227
Training Accuracy: 0.9568381851470174, Training F1 Score: 0.9568183377553612
Validation Accuracy: 0.9429446744059964, Validation F1 Score: 0.9428883096294064

Epoch 5, Training Loss: 0.11249595320158029

In [46]:
for param_group in optimizer.param_groups:
    param_group['lr'] = 0.0002

torch.cuda.synchronize()
start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 20, 1024 * 16)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

Epoch 1, Training Loss: 0.10438274481478599, Validation Loss: 0.13789183967236784
Training Accuracy: 0.9617283658448954, Training F1 Score: 0.9617088318731036
Validation Accuracy: 0.9468172078173541, Validation F1 Score: 0.9467564429382519

Epoch 2, Training Loss: 0.10207500785411677, Validation Loss: 0.13706161828342908
Training Accuracy: 0.9620575333093808, Training F1 Score: 0.9620447250449953
Validation Accuracy: 0.9470065316730205, Validation F1 Score: 0.9469585256873705

Epoch 3, Training Loss: 0.1012514452895042, Validation Loss: 0.13660896747498288
Training Accuracy: 0.9623544294538187, Training F1 Score: 0.9623528044203848
Validation Accuracy: 0.9472044611584899, Validation F1 Score: 0.9471772112805764

Epoch 4, Training Loss: 0.10063510835852954, Validation Loss: 0.13628075455353952
Training Accuracy: 0.9624985746833645, Training F1 Score: 0.9624993464718562
Validation Accuracy: 0.9475745032400196, Validation F1 Score: 0.9475515934829855

Epoch 5, Training Loss: 0.10012291269

In [47]:
torch.cuda.synchronize()
start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 20, 1024 * 16)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

Epoch 1, Training Loss: 0.09408655950850442, Validation Loss: 0.13329234114371585
Training Accuracy: 0.9644757308916135, Training F1 Score: 0.964462610451983
Validation Accuracy: 0.9488395308210631, Validation F1 Score: 0.9487945787756927

Epoch 2, Training Loss: 0.09331592123334488, Validation Loss: 0.13265810175312787
Training Accuracy: 0.9648500782041656, Training F1 Score: 0.9648325634773427
Validation Accuracy: 0.9491063053449567, Validation F1 Score: 0.9490494404039059

Epoch 3, Training Loss: 0.09254076163053716, Validation Loss: 0.1325647020476304
Training Accuracy: 0.9649189236869338, Training F1 Score: 0.9648995429491626
Validation Accuracy: 0.9490890940853506, Validation F1 Score: 0.9490304819780228

Epoch 4, Training Loss: 0.09195766459005449, Validation Loss: 0.13264460826264696
Training Accuracy: 0.964992072012375, Training F1 Score: 0.9649722544813696
Validation Accuracy: 0.9490546715661385, Validation F1 Score: 0.9489931553773624

Epoch 5, Training Loss: 0.0914937894208

In [48]:
for param_group in optimizer.param_groups:
    param_group['lr'] = 0.0001

torch.cuda.synchronize()
start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 20, 1024 * 16)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

Epoch 1, Training Loss: 0.08455128031209447, Validation Loss: 0.1281377704352258
Training Accuracy: 0.9681051786862991, Training F1 Score: 0.9680794581539828
Validation Accuracy: 0.9510683889400445, Validation F1 Score: 0.9509997139809068

Epoch 2, Training Loss: 0.08343363505337842, Validation Loss: 0.12799011852927272
Training Accuracy: 0.9682708381292101, Training F1 Score: 0.9682341672148342
Validation Accuracy: 0.9511544452380748, Validation F1 Score: 0.9510604025717083

Epoch 3, Training Loss: 0.0833648867990448, Validation Loss: 0.12691932296694558
Training Accuracy: 0.9689463844288729, Training F1 Score: 0.9689122568936411
Validation Accuracy: 0.9513954028725592, Validation F1 Score: 0.9513032785478235

Epoch 4, Training Loss: 0.08313656049765522, Validation Loss: 0.12556711872734005
Training Accuracy: 0.9697639245367452, Training F1 Score: 0.9697307508112591
Validation Accuracy: 0.9521699095548307, Validation F1 Score: 0.9520812548867525

Epoch 5, Training Loss: 0.082604627793

In [49]:
for param_group in optimizer.param_groups:
    param_group['lr'] = 0.00005

torch.cuda.synchronize()
start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 20, 1024 * 16)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

Epoch 1, Training Loss: 0.07552349267276798, Validation Loss: 0.1197168956791098
Training Accuracy: 0.9732406214165389, Training F1 Score: 0.9732420503617814
Validation Accuracy: 0.955302358803129, Validation F1 Score: 0.9552878728476861

Epoch 2, Training Loss: 0.07503097019361654, Validation Loss: 0.11997202698226901
Training Accuracy: 0.9732191072031737, Training F1 Score: 0.9732238919881614
Validation Accuracy: 0.9551130349474626, Validation F1 Score: 0.9551015302573446

Epoch 3, Training Loss: 0.07475445128560552, Validation Loss: 0.11981555840541769
Training Accuracy: 0.9733417382193547, Training F1 Score: 0.9733463907715331
Validation Accuracy: 0.9551474574666747, Validation F1 Score: 0.9551355732769966

Epoch 4, Training Loss: 0.07456040494285261, Validation Loss: 0.11974082116782037
Training Accuracy: 0.9734019780167767, Training F1 Score: 0.9734063382744743
Validation Accuracy: 0.9552249081349019, Validation F1 Score: 0.9552137399415805

Epoch 5, Training Loss: 0.074396985795

In [50]:
for param_group in optimizer.param_groups:
    param_group['lr'] = 0.00002

torch.cuda.synchronize()
start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 10, 1024 * 16)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

Epoch 1, Training Loss: 0.07162546105255108, Validation Loss: 0.1174174862091672
Training Accuracy: 0.9750499667605403, Training F1 Score: 0.9750510114475586
Validation Accuracy: 0.9563780625285061, Validation F1 Score: 0.9563609844685513

Epoch 2, Training Loss: 0.07130857129046957, Validation Loss: 0.11744424025951104
Training Accuracy: 0.9751446292993466, Training F1 Score: 0.975146459017872
Validation Accuracy: 0.956283400600673, Validation F1 Score: 0.9562686970037793

Epoch 3, Training Loss: 0.07118461743945978, Validation Loss: 0.1173951029039541
Training Accuracy: 0.9752134747821148, Training F1 Score: 0.9752154019454865
Validation Accuracy: 0.9563350343794911, Validation F1 Score: 0.9563211028571166

Epoch 4, Training Loss: 0.07111795450899827, Validation Loss: 0.11736570658515216
Training Accuracy: 0.9751919605687498, Training F1 Score: 0.9751938503366309
Validation Accuracy: 0.9563952737881122, Validation F1 Score: 0.9563811408774756

Epoch 5, Training Loss: 0.07105169740844

In [51]:
for param_group in optimizer.param_groups:
    param_group['lr'] = 0.00001

torch.cuda.synchronize()
start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 10, 1024 * 16)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

Epoch 1, Training Loss: 0.07035026523862674, Validation Loss: 0.11706285030852569
Training Accuracy: 0.9754027998597273, Training F1 Score: 0.9754029572731007
Validation Accuracy: 0.9566104145331876, Validation F1 Score: 0.9565938670133044

Epoch 2, Training Loss: 0.07030765793167681, Validation Loss: 0.11703670855852995
Training Accuracy: 0.975469493921159, Training F1 Score: 0.9754709421150373
Validation Accuracy: 0.9565845976437786, Validation F1 Score: 0.9565699827356388

Epoch 3, Training Loss: 0.07029032349903579, Validation Loss: 0.1170174498138099
Training Accuracy: 0.9755189766118987, Training F1 Score: 0.9755207720875843
Validation Accuracy: 0.9565501751245665, Validation F1 Score: 0.9565360604426596

Epoch 4, Training Loss: 0.0702534009682425, Validation Loss: 0.11700455772719576
Training Accuracy: 0.9755490965106097, Training F1 Score: 0.975551051371212
Validation Accuracy: 0.9565415694947634, Validation F1 Score: 0.9565283059965443

Epoch 5, Training Loss: 0.07021668500728

In [52]:
for param_group in optimizer.param_groups:
    param_group['lr'] = 0.000005

torch.cuda.synchronize()
start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 5, 1024 * 16)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

Epoch 1, Training Loss: 0.06980081457204367, Validation Loss: 0.11680278580548555
Training Accuracy: 0.9755060680838796, Training F1 Score: 0.9754966495769104
Validation Accuracy: 0.9566706539418087, Validation F1 Score: 0.9566379236439734

Epoch 2, Training Loss: 0.06965290638954637, Validation Loss: 0.11674298313259643
Training Accuracy: 0.9755727621453113, Training F1 Score: 0.9755652985909335
Validation Accuracy: 0.9567136820908239, Validation F1 Score: 0.9566845497275421

Epoch 3, Training Loss: 0.06963831450617539, Validation Loss: 0.11674056503792135
Training Accuracy: 0.9755706107239748, Training F1 Score: 0.9755627924967688
Validation Accuracy: 0.9567222877206268, Validation F1 Score: 0.9566925626043296

Epoch 4, Training Loss: 0.06961822089154993, Validation Loss: 0.11673323757319491
Training Accuracy: 0.9755878220946669, Training F1 Score: 0.9755800967148666
Validation Accuracy: 0.9566964708312178, Validation F1 Score: 0.9566668639237061

Epoch 5, Training Loss: 0.0696002721

In [53]:
for param_group in optimizer.param_groups:
    param_group['lr'] = 0.000001

torch.cuda.synchronize()
start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 5, 1024 * 16)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

Epoch 1, Training Loss: 0.06940010032095444, Validation Loss: 0.11669928767384942
Training Accuracy: 0.9756631218414445, Training F1 Score: 0.9756562817781657
Validation Accuracy: 0.9566878652014148, Validation F1 Score: 0.9566596748726762

Epoch 2, Training Loss: 0.06936662093831844, Validation Loss: 0.11668678589864918
Training Accuracy: 0.9756588189987715, Training F1 Score: 0.9756519781471047
Validation Accuracy: 0.9566018089033846, Validation F1 Score: 0.9565736300383773

Epoch 3, Training Loss: 0.06936168336683038, Validation Loss: 0.1166864332975485
Training Accuracy: 0.975669576105454, Training F1 Score: 0.975662657453177
Validation Accuracy: 0.9565673863841725, Validation F1 Score: 0.9565389470044268

Epoch 4, Training Loss: 0.06935726788623922, Validation Loss: 0.11668566071036836
Training Accuracy: 0.975660970420108, Training F1 Score: 0.9756540682792527
Validation Accuracy: 0.9565415694947634, Validation F1 Score: 0.9565131642815181

Epoch 5, Training Loss: 0.06935356863348

In [54]:
for param_group in optimizer.param_groups:
    param_group['lr'] = 0.000001 * 10

torch.cuda.synchronize()
start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 5, 1024 * 16)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

Epoch 1, Training Loss: 0.06972777877932808, Validation Loss: 0.11686190500457024
Training Accuracy: 0.9756889388974826, Training F1 Score: 0.9756917234113511
Validation Accuracy: 0.9565932032735815, Validation F1 Score: 0.956582053160514

Epoch 2, Training Loss: 0.06985445251794688, Validation Loss: 0.1169492336232257
Training Accuracy: 0.9756351533640699, Training F1 Score: 0.9756391611620888
Validation Accuracy: 0.9565329638649605, Validation F1 Score: 0.9565232216454832

Epoch 3, Training Loss: 0.06982581798916436, Validation Loss: 0.11694324883201232
Training Accuracy: 0.9756545161560986, Training F1 Score: 0.9756585661433242
Validation Accuracy: 0.9564813300861423, Validation F1 Score: 0.9564711404340759

Epoch 4, Training Loss: 0.06979018771888866, Validation Loss: 0.11692988638058845
Training Accuracy: 0.975669576105454, Training F1 Score: 0.975673639918884
Validation Accuracy: 0.9564899357159453, Validation F1 Score: 0.9564796249462926

Epoch 5, Training Loss: 0.06975695590691

In [55]:
for name, param in model.named_parameters():
    print(name)
    print(param)

first_layer.linear.weight
Parameter containing:
tensor([[ 1.0582, -0.0018,  0.0028,  ..., -0.0036, -0.0012, -0.0028],
        [-1.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [-0.0172,  1.0564, -0.0185,  ..., -0.0118, -0.0121, -0.0128],
        ...,
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000, -1.0000,  0.0000],
        [ 0.0315, -0.0280, -0.0218,  ..., -0.0173, -0.0192,  1.0129],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000, -1.0000]],
       device='cuda:0', requires_grad=True)
first_layer.linear.bias
Parameter containing:
tensor([-0.0009,  0.0000, -0.0090,  0.0000, -0.0052,  0.0000, -0.0073,  0.0000,
        -0.0069,  0.0000, -0.0085,  0.0000, -0.0075,  0.0000, -0.0063,  0.0000,
        -0.0054,  0.0000, -0.0100,  0.0000, -0.0088,  0.0000, -0.0117,  0.0000,
        -0.0076,  0.0000, -0.0022,  0.0000, -0.0108,  0.0000, -0.0066,  0.0000,
        -0.0116,  0.0000, -0.0079,  0.0000, -0.0129,  0.0000, -0.0082,  0.0000,
        -0.0193,  0.0000, -0

In [34]:
optimizer = optim.Adam(model.parameters(), lr=0.001)

torch.cuda.synchronize()
start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 10, 1024 * 16)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

Epoch 1, Training Loss: 1.307600110554671, Validation Loss: 1.1856549278817368
Training Accuracy: 0.48759813170571137, Training F1 Score: 0.31964538402624754
Validation Accuracy: 0.48760359026875383, Validation F1 Score: 0.3196513678755306

Epoch 2, Training Loss: 0.9911411693507082, Validation Loss: 0.7952721405237344
Training Accuracy: 0.6690683700186528, Training F1 Score: 0.6359367341083367
Validation Accuracy: 0.6669277041040248, Validation F1 Score: 0.6338367587537906

Epoch 3, Training Loss: 0.7557137665118112, Validation Loss: 0.7275613861462613
Training Accuracy: 0.694726220877823, Training F1 Score: 0.6756646906635713
Validation Accuracy: 0.692649931585243, Validation F1 Score: 0.6735216103720431

Epoch 4, Training Loss: 0.7035255324110403, Validation Loss: 0.7264274120680633
Training Accuracy: 0.694420719048039, Training F1 Score: 0.665369836713696
Validation Accuracy: 0.6925380583978038, Validation F1 Score: 0.6634341709434263

Epoch 5, Training Loss: 0.6919250369417937, Va

In [29]:
optimizer = optim.Adam(model.parameters(), lr=0.001)

torch.cuda.synchronize()
start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 10, 1024 * 64)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

Epoch 1, Training Loss: 1.8057236788097735, Validation Loss: 1.205128083886402
Training Accuracy: 0.5748275097943456, Training F1 Score: 0.532677902978648
Validation Accuracy: 0.5737803671161674, Validation F1 Score: 0.5316521221622602

Epoch 2, Training Loss: 1.0996317938779163, Validation Loss: 0.8743102936598277
Training Accuracy: 0.5977143299720961, Training F1 Score: 0.5214867473120899
Validation Accuracy: 0.5950620896190287, Validation F1 Score: 0.5181048741384898

Epoch 3, Training Loss: 0.7885691515017922, Validation Loss: 0.6949502639413176
Training Accuracy: 0.6891949166216661, Training F1 Score: 0.6746762224566707
Validation Accuracy: 0.6881663984578711, Validation F1 Score: 0.673683985072721

Epoch 4, Training Loss: 0.7210023260143712, Validation Loss: 0.7061048918152707
Training Accuracy: 0.7011890905726869, Training F1 Score: 0.6712697614928861
Validation Accuracy: 0.6990611257884908, Validation F1 Score: 0.669062541191424

Epoch 5, Training Loss: 0.6800031598499585, Vali

In [30]:
optimizer = optim.Adam(model.parameters(), lr=0.0001)

torch.cuda.synchronize()
start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 10, 1024 * 64)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

Epoch 1, Training Loss: 0.6482883245601477, Validation Loss: 0.65033711338891
Training Accuracy: 0.7215953219494459, Training F1 Score: 0.7052375619531768
Validation Accuracy: 0.7193704121236113, Validation F1 Score: 0.702937030999159

Epoch 2, Training Loss: 0.6444209904029583, Validation Loss: 0.644916022551123
Training Accuracy: 0.7231981308451428, Training F1 Score: 0.7093322124349345
Validation Accuracy: 0.7218402278770771, Validation F1 Score: 0.707716455939064

Epoch 3, Training Loss: 0.6415352971006409, Validation Loss: 0.6437382442899314
Training Accuracy: 0.7228151778472448, Training F1 Score: 0.7109572041503743
Validation Accuracy: 0.7216767209108199, Validation F1 Score: 0.709661205516383

Epoch 4, Training Loss: 0.6395893395138763, Validation Loss: 0.6416136971650775
Training Accuracy: 0.7237101691232313, Training F1 Score: 0.7118557533517296
Validation Accuracy: 0.722279114997031, Validation F1 Score: 0.7102082487662996

Epoch 5, Training Loss: 0.6377461002811073, Validat

In [29]:
optimizer = optim.Adam(model.parameters(), lr=0.001)

torch.cuda.synchronize()
start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 10, 1024 * 8)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

1.9459153413772583
1.7211235761642456
1.8293129205703735
1.6529580354690552
1.875167727470398
1.629576325416565
1.639758586883545
1.4842135906219482
1.396589756011963
1.3132436275482178
1.307774305343628
1.5233104228973389
1.513757348060608
1.5241475105285645
1.449304461479187
1.3725199699401855
1.195020079612732
1.147672176361084
1.5728925466537476
1.0980587005615234
1.1427743434906006
1.1603469848632812
1.219657301902771
1.184799313545227
1.119278907775879
1.1146836280822754
1.167554497718811
1.1407761573791504
1.0683541297912598
1.019402265548706
1.0105711221694946
1.0071899890899658
0.9998883008956909
1.0079543590545654
0.9901393055915833
0.9908040165901184
0.995794951915741
0.979019284248352
0.9791274666786194
0.9843977093696594
0.9760539531707764
0.9427629709243774
0.9618619680404663
0.9578478336334229
0.9528827667236328
0.9406067132949829
0.9433158040046692
0.9316280484199524
0.936485230922699
0.9138314127922058
0.9193710088729858
0.9140663146972656
0.9162853360176086
0.91325813

KeyboardInterrupt: 

In [20]:
optimizer = optim.Adam(model.parameters(), lr=0.001)

torch.cuda.synchronize()
start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 10, 1024 * 8)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

1.9459153413772583
1.5398389101028442
1.320652961730957
1.2236943244934082
1.2085390090942383
1.2440117597579956
1.2571783065795898
1.2673126459121704
1.2284069061279297
1.2386085987091064
1.2222321033477783
1.2211248874664307
1.1789835691452026
1.1707390546798706
1.1351127624511719
1.1525695323944092
1.1121410131454468
1.1236003637313843
1.1125309467315674
1.0914899110794067
1.0991089344024658
1.0779876708984375
1.078782558441162
1.0558438301086426
1.047053575515747
1.055101990699768
1.0477261543273926
1.0337426662445068
1.0097846984863281
1.0111554861068726
0.9982945919036865
0.9946751594543457
0.9888061881065369
0.9877339005470276
0.9601951241493225
0.9653480052947998
0.9570215940475464
0.9451500773429871
0.9471073746681213
0.9473270773887634
0.937891960144043
0.9114409685134888
0.9271887540817261
0.9155710339546204
0.9237993359565735
0.9161682724952698
0.9090586304664612
0.9052014946937561
0.9101273417472839
0.8804532885551453
0.8974512815475464
0.8741264343261719
0.879237294197082

In [37]:
optimizer = optim.Adam(model.parameters(), lr=0.01)

torch.cuda.synchronize()
start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 100, 1024 * 16)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

1.945919156074524
1.2594339847564697
1.2584514617919922
1.3378372192382812
1.3107519149780273
1.2927818298339844
1.2241499423980713
1.1768137216567993
1.1734135150909424
1.1348594427108765
1.0834970474243164
1.041297435760498
1.0289627313613892
1.0330392122268677
1.0062477588653564
0.9907570481300354
0.9803515672683716
0.9421758651733398
0.910335123538971
0.8918401002883911
0.8682236075401306
0.8836233615875244
0.8830834627151489
0.8678690195083618
0.8456855416297913
0.8398438096046448
0.827994167804718
0.8162038922309875
0.8145592212677002
Epoch 1, Training Loss: 1.0629857906186324, Validation Loss: 0.8064155761279874
Training Accuracy: 0.6648752498338028, Training F1 Score: 0.6461636927342742
Validation Accuracy: 0.6639071280431659, Validation F1 Score: 0.6451132605293023

0.7956252694129944
0.7927773594856262
0.7903684377670288
0.7921122908592224
0.7751148343086243
0.7722638845443726
0.7749717831611633
0.756729245185852
0.7641393542289734
0.7576382756233215
0.7537832856178284
0.7531

KeyboardInterrupt: 

In [31]:
for name, param in model.named_parameters():
    print(name)
    print(param)

copy_tensor
Parameter containing:
tensor([[[[ 0.8277,  0.4410,  0.3069,  ...,  0.5385, -0.9969,  1.3867],
          [-1.1852,  0.8048, -0.3854,  ...,  0.0073, -0.0906, -0.4299],
          [-0.5277,  1.8312,  2.6152,  ..., -0.6186,  0.3207, -0.1636],
          ...,
          [-1.4442, -0.9133, -0.1195,  ..., -1.0740,  0.2558, -1.7473],
          [-0.8845, -0.4335,  0.7672,  ...,  0.0748, -1.8827, -1.0929],
          [-1.0942, -1.1413,  0.8970,  ...,  0.5688,  0.1352, -0.1426]],

         [[-0.3369,  0.1095,  0.8045,  ...,  0.7055, -0.9463,  1.8010],
          [ 0.4168,  0.8493, -0.1739,  ...,  0.9907, -0.4204, -0.9187],
          [-0.3940,  0.1856,  0.5866,  ..., -0.0302,  1.5826,  0.6268],
          ...,
          [ 0.7070,  0.5277, -0.2452,  ...,  1.1998, -0.1681,  0.7360],
          [ 0.6341,  1.0665, -0.0837,  ..., -1.4147, -0.1684,  2.0537],
          [-1.1398,  0.4702, -0.2144,  ...,  1.6041,  1.3350, -1.1957]],

         [[-0.0395, -0.2068, -0.6581,  ..., -1.0872,  1.6298,  0.454

In [None]:
criterion = CustomLoss(nn.CrossEntropyLoss(), first_order_weight=0.0001, second_order_weight=0.0)
optimizer = optim.Adam(model.parameters(), lr=0.001)

torch.cuda.synchronize()
start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 100, 1024 * 16)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

In [None]:
criterion = CustomLoss(nn.CrossEntropyLoss(), first_order_weight=0.001, second_order_weight=0.0)
optimizer = optim.Adam(model.parameters(), lr=0.001)

torch.cuda.synchronize()
start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 100, 1024 * 16)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

In [None]:
criterion = CustomLoss(nn.CrossEntropyLoss(), first_order_weight=0.001, second_order_weight=0.0)
optimizer = optim.Adam(model.parameters(), lr=0.001)

torch.cuda.synchronize()
start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 100, 1024 * 16)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

In [None]:
criterion = CustomLoss(nn.CrossEntropyLoss(), first_order_weight=0.0001, second_order_weight=0.0)
optimizer = optim.Adam(model.parameters(), lr=0.001)

torch.cuda.synchronize()
start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 100, 1024 * 16)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

In [None]:
criterion = CustomLoss(nn.CrossEntropyLoss(), first_order_weight=0.00001, second_order_weight=0.0)
optimizer = optim.Adam(model.parameters(), lr=0.001)

torch.cuda.synchronize()
start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 100, 1024 * 16)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

In [None]:
for param_group in optimizer.param_groups:
    param_group['lr'] = 0.0001

torch.cuda.synchronize()
start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 20, 1024 * 16)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

In [None]:
for param_group in optimizer.param_groups:
    param_group['lr'] = 0.001

torch.cuda.synchronize()
start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 20, 1024 * 16)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

In [None]:
optimizer = optim.Adam(model.parameters(), lr=0.0001)

torch.cuda.synchronize()
start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 100, 1024 * 16)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

In [None]:
optimizer = optim.Adam(model.parameters(), lr=0.0001)

torch.cuda.synchronize()
start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 100, 1024 * 16)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

In [None]:
optimizer = optim.Adam(model.parameters(), lr=0.0001)

torch.cuda.synchronize()
start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 100, 1024 * 16)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

In [None]:
optimizer = optim.Adam(model.parameters(), lr=0.0001)

torch.cuda.synchronize()
start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 100, 1024 * 16)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

In [None]:
evaluate_model(model, custom_train_loader, criterion, optimizer, 100, 1024)

In [None]:
optimizer = optim.Adam(model.parameters(), lr=0.0001)

torch.cuda.synchronize()
start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 1, 1024 * 16)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

In [None]:
optimizer = optim.Adam(model.parameters(), lr=0.001, fused=True)

torch.cuda.synchronize()
start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 1, 1024 * 16)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

In [None]:
optimizer = optim.Adam(model.parameters(), lr=0.001)

start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 100, 1024 * 32)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

In [None]:
optimizer = optim.Adam(model.parameters(), lr=0.001)
evaluate_model(model, custom_train_loader, criterion, optimizer, 100, 1024 * 32)

In [None]:
optimizer = optim.Adam(model.parameters(), lr=0.0001)
evaluate_model(model, custom_train_loader, criterion, optimizer, 10000, 1024 * 32)

In [None]:
optimizer = optim.Adam(model.parameters(), lr=0.001)
evaluate_model(model, custom_train_loader, criterion, optimizer, 10000, 1024 * 32)

In [None]:
optimizer = optim.SGD(model.parameters(), lr=0.05)

start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 1, 1024 * 16)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

In [None]:
optimizer = optim.Adam(model.parameters(), lr=0.01)

start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 1, 1024 * 16)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

In [None]:
optimizer = optim.Adam(model.parameters(), lr=0.1)

start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 100, 1024 * 16)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

In [None]:
optimizer = optim.Adam(model.parameters(), lr=0.1)
optimizer = torch.optim.LBFGS(model.parameters(), lr=0.01)

start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 100, 1024 * 1000)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

In [None]:
optimizer = optim.Adam(model.parameters(), lr=0.1)
optimizer = torch.optim.LBFGS(model.parameters(), lr=0.01)

start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 100, 1024 * 1000)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

In [None]:
torch.cuda.synchronize()

with profiler.profile(with_stack=True, use_device='cuda') as prof:
    optimizer = optim.Adam(model.parameters(), lr=0.01)
    
    start_time = time.time()
    
    evaluate_model(model, custom_train_loader, criterion, optimizer, 1, 1024 * 1000)
    
    elapsed_time = time.time() - start_time
    print(f"Execution time: {elapsed_time:.6f} seconds")

In [None]:
prof_averages = prof.key_averages().table(sort_by="self_cuda_time_total", row_limit=20)
print(prof_averages)

In [None]:
prof_averages = prof.key_averages().table(sort_by="self_cpu_time_total", row_limit=20)
print(prof_averages)

In [None]:
print(model.copy_tensor[0:, 0])
print(model.copy_tensor.shape)
print(model.copy_tensor[0:, 0].shape)
tensor_data = model.copy_tensor[0, 0, :].cpu().detach().numpy()

# Create a grid for x and y from 0 to 1, matching the dimensions of the tensor
x = np.linspace(0, 1, tensor_data.shape[1])
y = np.linspace(0, 1, tensor_data.shape[0])
X, Y = np.meshgrid(x, y)

# Z values are the tensor data (already 2D, so no need to flatten)
Z = tensor_data

fig = plt.figure(figsize=(8, 6))
ax = fig.add_subplot(111, projection='3d')

# Create surface plot, connecting the points
surf = ax.plot_surface(X, Y, Z, cmap='viridis', edgecolor='none')

# Labels and title
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('Z')

plt.title('3D Surface Plot of Tensor Data')
plt.colorbar(surf, label='Z values')

plt.show()

# print(model.copy_tensor[1, 0, :].reshape(-1).shape)
# tensor = model.copy_tensor[0, 0, :].reshape(-1).detach().cpu()

# x_values = torch.arange(len(tensor))

# plt.scatter(x_values, tensor, marker='o')
# plt.xlabel('Index')
# plt.ylabel('Value')
# plt.title('1D Tensor Values')
# plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Assuming tensor_data is defined here or generated dynamically
# Loop 45 times for creating the plots
for i in range(45):
    # Replace this with your method of generating or updating tensor_data for each plot
    # Example: tensor_data = np.random.rand(50, 50) # Random data for demo
    tensor_data = model.copy_tensor[i, 0, :].cpu().detach().numpy()

    # Create a grid for x and y from 0 to 1, matching the dimensions of the tensor
    x = np.linspace(0, 1, tensor_data.shape[1])
    y = np.linspace(0, 1, tensor_data.shape[0])
    X, Y = np.meshgrid(x, y)

    # Flatten the X, Y, and tensor data (Z values) for scatter plot
    X_flat = X.flatten()
    Y_flat = Y.flatten()
    Z_flat = tensor_data.flatten()

    fig = plt.figure(figsize=(8, 6))
    ax = fig.add_subplot(111, projection='3d')

    # Scatter plot in 3D
    sc = ax.scatter(X_flat, Y_flat, Z_flat, c=Z_flat, cmap='viridis')

    # Labels and title
    ax.set_xlabel('X')
    ax.set_ylabel('Y')
    ax.set_zlabel('Z')
    plt.title(f'3D Scatter Plot of Tensor Data - Plot {i+1}')

    # Add color bar
    plt.colorbar(sc, label='Z values')

    # Show the plot for each iteration
    plt.show()


In [None]:
X_train = custom_train_loader.train_data_tensor.cpu().numpy()
y_train = custom_train_loader.train_labels_tensor.cpu().numpy()
X_val = custom_train_loader.val_data_tensor.cpu().numpy()
y_val = custom_train_loader.val_labels_tensor.cpu().numpy()

"""
log_reg = LogisticRegression(penalty='l2', C=55.0)

"""
log_reg = LogisticRegression(solver='lbfgs', max_iter=1000, penalty=None)
log_reg = LogisticRegression()

log_reg.fit(X_train, y_train)

y_train_pred = log_reg.predict(X_train)
y_val_pred = log_reg.predict(X_val)

train_accuracy = accuracy_score(y_train, y_train_pred)
val_accuracy = accuracy_score(y_val, y_val_pred)

train_log_loss = log_loss(y_train, log_reg.predict_proba(X_train))
val_log_loss = log_loss(y_val, log_reg.predict_proba(X_val))

print(f'Training Accuracy: {train_accuracy}')
print(f'Training Log Loss: {train_log_loss}')
print()
print(f'Validation Accuracy: {val_accuracy}')
print(f'Validation Log Loss: {val_log_loss}')