In [7]:
import numpy as np
import math
import pandas as pd
import matplotlib.pyplot as plt
from scipy.special import logit
from scipy.stats import norm

import tensorflow as tf
from keras import layers, models, datasets

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
from torchinfo import summary
from torch.optim.lr_scheduler import StepLR
import torch.autograd.profiler as profiler

from sklearn.model_selection import train_test_split, LeaveOneOut, StratifiedKFold, cross_val_predict
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder, PowerTransformer
from sklearn.metrics import f1_score, log_loss, accuracy_score
from sklearn.linear_model import LogisticRegression

import sys
import time

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

An interpolation based learning technique, driven through explicit regularization

In [8]:
def calculate_metrics(model, data_tensor, labels_tensor, batch_size=1024):
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for start_idx in range(0, len(data_tensor), batch_size):
            end_idx = min(start_idx + batch_size, len(data_tensor))
            inputs = data_tensor[start_idx:end_idx].view(-1, 54)
            labels = labels_tensor[start_idx:end_idx]

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    accuracy = accuracy_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds, average='weighted')
    return accuracy, f1

In [9]:
class CustomDataLoader:
    def __init__(self, features, labels, validation_size=0.2):
        train_data, val_data, train_labels, val_labels = train_test_split(
            features, labels, test_size=validation_size, stratify=labels, random_state=42
        )
        
        self.train_data_tensor = torch.tensor(train_data).float().to(device)
        self.train_labels_tensor = torch.tensor(train_labels).long().to(device)
        
        self.val_data_tensor = torch.tensor(val_data).float().to(device)
        self.val_labels_tensor = torch.tensor(val_labels).long().to(device)

In [10]:
def evaluate_model(model, custom_train_loader, criterion, optimizer, num_epochs, batch_size=1024):
    unregularized_criterion = nn.CrossEntropyLoss()

    for epoch in range(num_epochs):
        running_loss = 0.0
        
        model.train()
        for start_idx in range(0, len(custom_train_loader.train_data_tensor), batch_size):
            end_idx = min(start_idx + batch_size, len(custom_train_loader.train_data_tensor))
            inputs = custom_train_loader.train_data_tensor[start_idx:end_idx].view(-1, 54)
            labels = custom_train_loader.train_labels_tensor[start_idx:end_idx]

            if isinstance(optimizer, torch.optim.LBFGS):
                def closure():
                    optimizer.zero_grad()
                    outputs = model(inputs)
                    loss = criterion(outputs, labels, model.copy_tensor)
                    loss.backward()
                    return loss
                
                optimizer.step(closure)
                running_loss += closure().item() * len(labels)
            else:
                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, labels, model.copy_tensor)
                loss.backward()
                optimizer.step()
                running_loss += loss.item() * len(labels)
                print(loss.item())
                      
        avg_train_loss = running_loss / len(custom_train_loader.train_data_tensor)

        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for start_idx in range(0, len(custom_train_loader.val_data_tensor), batch_size):
                end_idx = min(start_idx + batch_size, len(custom_train_loader.val_data_tensor))
                val_inputs = custom_train_loader.val_data_tensor[start_idx:end_idx].view(-1, 54)
                val_labels = custom_train_loader.val_labels_tensor[start_idx:end_idx]

                val_outputs = model(val_inputs)
                val_loss += unregularized_criterion(val_outputs, val_labels).item() * len(val_labels)

        avg_val_loss = val_loss / len(custom_train_loader.val_data_tensor)

        train_accuracy, train_f1 = calculate_metrics(model, custom_train_loader.train_data_tensor, custom_train_loader.train_labels_tensor)
        val_accuracy, val_f1 = calculate_metrics(model, custom_train_loader.val_data_tensor, custom_train_loader.val_labels_tensor)

        print(f'Epoch {epoch + 1}, Training Loss: {avg_train_loss}, Validation Loss: {avg_val_loss}')
        print(f'Training Accuracy: {train_accuracy}, Training F1 Score: {train_f1}')
        print(f'Validation Accuracy: {val_accuracy}, Validation F1 Score: {val_f1}')
        print()

In [23]:
class CustomLoss(nn.Module):
    def __init__(self, base_loss_fn, first_order_weight=0.0, second_order_weight=0.0):
        super(CustomLoss, self).__init__()
        self.first_order_weight = first_order_weight
        self.second_order_weight = second_order_weight
        self.base_loss_fn = base_loss_fn

    def forward(self, outputs, labels, optimized_tensor):
        base_loss = self.base_loss_fn(outputs, labels)
        
#         return base_loss
        first_order_x = (optimized_tensor[:, :, :-1, :] - optimized_tensor[:, :, 1:, :])
        first_order_loss = self.first_order_weight * torch.sum(first_order_x ** 2)

        second_order_x = (first_order_x[:, :, 1:, :] - first_order_x[:, :, :-1, :])
        second_order_loss = self.second_order_weight * torch.sum(second_order_x ** 2)

        return base_loss + first_order_loss + second_order_loss

In [12]:
data = pd.read_csv('/kaggle/input/forest-cover-type-dataset/covtype.csv')
# data = pd.read_csv('/kaggle/input/breast-cancer-wisconsin-data/data.csv')

"""data = data.dropna()"""

"""
X = data[[
    "Elevation",
    "Aspect",
    "Slope",
    "Horizontal_Distance_To_Hydrology",
    "Vertical_Distance_To_Hydrology",
    "Horizontal_Distance_To_Roadways",
    "Hillshade_9am",
    "Hillshade_Noon",
    "Hillshade_3pm",
    "Horizontal_Distance_To_Fire_Points"
]]
"""

# print(data.columns)
# X = data[['radius_worst', 'concave points_worst']]
# X = data.drop(["id", "diagnosis", "Unnamed: 32"], axis=1)
# y = data["diagnosis"]
X = data.drop(["Cover_Type"], axis=1)
y = data["Cover_Type"]

X = pd.get_dummies(X, drop_first=True)
for col in X.columns:
    if (X[col] > 0).all():
        X[col] = np.log(X[col])

print(X.shape, y.shape)
print(X.columns)

(581012, 54) (581012,)
Index(['Elevation', 'Aspect', 'Slope', 'Horizontal_Distance_To_Hydrology',
       'Vertical_Distance_To_Hydrology', 'Horizontal_Distance_To_Roadways',
       'Hillshade_9am', 'Hillshade_Noon', 'Hillshade_3pm',
       'Horizontal_Distance_To_Fire_Points', 'Wilderness_Area1',
       'Wilderness_Area2', 'Wilderness_Area3', 'Wilderness_Area4',
       'Soil_Type1', 'Soil_Type2', 'Soil_Type3', 'Soil_Type4', 'Soil_Type5',
       'Soil_Type6', 'Soil_Type7', 'Soil_Type8', 'Soil_Type9', 'Soil_Type10',
       'Soil_Type11', 'Soil_Type12', 'Soil_Type13', 'Soil_Type14',
       'Soil_Type15', 'Soil_Type16', 'Soil_Type17', 'Soil_Type18',
       'Soil_Type19', 'Soil_Type20', 'Soil_Type21', 'Soil_Type22',
       'Soil_Type23', 'Soil_Type24', 'Soil_Type25', 'Soil_Type26',
       'Soil_Type27', 'Soil_Type28', 'Soil_Type29', 'Soil_Type30',
       'Soil_Type31', 'Soil_Type32', 'Soil_Type33', 'Soil_Type34',
       'Soil_Type35', 'Soil_Type36', 'Soil_Type37', 'Soil_Type38',
       'Soi

In [15]:
x_scaler = StandardScaler()
x_scaled = x_scaler.fit_transform(X)
x_scaled = 1 / (1 + np.exp(-x_scaled))

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

In [16]:
class CustomLinearLayer(nn.Module):
    def __init__(self, input_size, output_size, bias=False):
        super(CustomLinearLayer, self).__init__()
        
        self.linear = nn.Linear(input_size, output_size, bias=bias)
        
        identity_matrix = torch.eye(input_size, output_size)
        with torch.no_grad():
            self.linear.weight.copy_(identity_matrix)

        self.freeze_mask = identity_matrix.bool()
        self.linear.weight.register_hook(self.custom_backward_hook)

    def custom_backward_hook(self, grad):
        grad[self.freeze_mask] = 0
        return grad

    def forward(self, x):
        return self.linear(x)

In [17]:
class TestClass(torch.nn.Module):
    def __init__(self, control_points, num_features, num_classes):
        super(TestClass, self).__init__()
        self.control_points = control_points
        self.num_features = num_features
        self.num_classes = num_classes
        self.num_pairs = num_features * (num_features - 1) // 2
        
        self.copy_tensor = nn.Parameter(torch.zeros(self.num_pairs, self.num_classes, self.control_points + 2, self.control_points + 2))
        self.i_indices, self.j_indices = torch.triu_indices(num_features, num_features, offset=1).to(device)
        self.feature_idx = torch.arange(self.num_pairs).view(1, -1, 1, 1).to(device)
        self.class_idx = torch.arange(self.num_classes).view(1, 1, -1, 1).to(device)
        self.fc1 = CustomLinearLayer(num_classes, num_classes, bias=True)
        
    def forward(self, x):
        batch_size = x.shape[0]
        feature_idx = self.feature_idx.expand(batch_size, -1, self.num_classes, 1)
        class_idx = self.class_idx.expand(batch_size, self.num_pairs, -1, 1)
        x = x * self.control_points
        
        x_i = x[:, self.i_indices]
        x_j = x[:, self.j_indices]        
        feature_pairs = torch.stack([x_i, x_j], dim=2)
        
        lower_idx = torch.floor(feature_pairs).long()
        upper_idx = lower_idx + 1
        
        lower_idx_i_og = lower_idx[:, :, 0]
        upper_idx_i_og = upper_idx[:, :, 0]
        lower_idx_j_og = lower_idx[:, :, 1]
        upper_idx_j_og = upper_idx[:, :, 1]
        
        lower_idx_i = lower_idx_i_og.unsqueeze(-1).unsqueeze(-1)
        upper_idx_i = upper_idx_i_og.unsqueeze(-1).unsqueeze(-1)
        lower_idx_j = lower_idx_j_og.unsqueeze(-1).unsqueeze(-1)
        upper_idx_j = upper_idx_j_og.unsqueeze(-1).unsqueeze(-1)
        
        index_0_0 = self.copy_tensor[feature_idx, class_idx, lower_idx_i, lower_idx_j].squeeze(-1)
        index_0_1 = self.copy_tensor[feature_idx, class_idx, lower_idx_i, upper_idx_j].squeeze(-1)
        index_1_0 = self.copy_tensor[feature_idx, class_idx, upper_idx_i, lower_idx_j].squeeze(-1)
        index_1_1 = self.copy_tensor[feature_idx, class_idx, upper_idx_i, upper_idx_j].squeeze(-1)
        
        lower_weight_i = (x_i - lower_idx_i_og.float()).unsqueeze(-1)
        lower_weight_j = (x_j - lower_idx_j_og.float()).unsqueeze(-1)
        upper_weight_i = (x_i - upper_idx_i_og.float()).unsqueeze(-1)
        upper_weight_j = (x_j - upper_idx_j_og.float()).unsqueeze(-1)

        lower_triangle_interpolated = index_0_0 + (index_1_0 - index_0_0) * lower_weight_i + (index_0_1 - index_0_0) * lower_weight_j
        upper_triangle_interpolated = index_1_1 + (index_1_1 - index_0_1) * upper_weight_i + (index_1_1 - index_1_0) * upper_weight_j
        selected_triangle = torch.where(lower_weight_i + lower_weight_j < 1, lower_triangle_interpolated, upper_triangle_interpolated).sum(dim=-2)
        return selected_triangle

In [18]:
num_epochs = 1
num_features = 54
num_classes = 7

model = TestClass(50, num_features, num_classes).to(device)
criterion = CustomLoss(nn.CrossEntropyLoss(), first_order_weight=0.0, second_order_weight=0.0)
custom_train_loader = CustomDataLoader(x_scaled, y_encoded, validation_size=0.2)

for name, param in model.named_parameters():
    break
    print(f"Layer: {name}")
    print(f"Shape: {param.shape}")
    print(param)
    
total_params = sum(p.numel() for p in model.parameters())
print(f'Total number of parameters: {total_params}')

Total number of parameters: 27086024


In [19]:
optimizer = optim.Adam(model.parameters(), lr=0.001)

torch.cuda.synchronize()
start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 100, 1024 * 16)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

1.945919156074524
1.1770764589309692
1.210916519165039
1.2462069988250732
1.1595715284347534
1.1044795513153076
1.0118178129196167
0.9463739395141602
0.9100254774093628
0.8782057166099548
0.8434998393058777
0.8063315749168396
0.8098554015159607
0.8101276159286499
0.7758947014808655
0.7442722320556641
0.7222808599472046
0.6921284198760986
0.687868058681488
0.6802382469177246
0.6665749549865723
0.6840744018554688
0.6849667429924011
0.6670860648155212
0.656784176826477
0.6431765556335449
0.6231364607810974
0.6186949014663696
0.6202908754348755
Epoch 1, Training Loss: 0.8684232967459677, Validation Loss: 0.6132767861705392
Training Accuracy: 0.7465819293516262, Training F1 Score: 0.7420060761632004
Validation Accuracy: 0.7440169358794524, Validation F1 Score: 0.7393098423062237

0.5947187542915344
0.6017975211143494
0.5961812138557434
0.5988333225250244
0.5991811156272888
0.5837312340736389
0.5873508453369141
0.5738973617553711
0.5841758251190186
0.5838549137115479
0.5811821222305298
0.579

KeyboardInterrupt: 

In [21]:
criterion = CustomLoss(nn.CrossEntropyLoss(), first_order_weight=0.0001, second_order_weight=0.0)
optimizer = optim.Adam(model.parameters(), lr=0.001)

torch.cuda.synchronize()
start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 100, 1024 * 16)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

0.2037190943956375
0.18504077196121216
0.1927442103624344
0.14395032823085785
0.15158644318580627
0.1775353103876114
0.16200976073741913
0.13824044167995453
0.14004294574260712
0.15475673973560333
0.1569846272468567
0.14436903595924377
0.1377590298652649
0.14164425432682037
0.14849528670310974
0.1413554549217224
0.14253292977809906
0.1399608552455902
0.1461510956287384
0.14380201697349548
0.13670730590820312
0.13578130304813385
0.14299646019935608
0.1382599174976349
0.14186009764671326
0.14161835610866547
0.1399492472410202
0.1421884298324585
0.09030790627002716
Epoch 1, Training Loss: 0.14964660114002348, Validation Loss: 0.27601787311514187
Training Accuracy: 0.9557259003160438, Training F1 Score: 0.9557588612424468
Validation Accuracy: 0.8881698407097923, Validation F1 Score: 0.8878149530824045

0.1318311244249344
0.1310063749551773
0.1295960694551468
0.13749077916145325
0.13598161935806274
0.1360015720129013
0.1356649398803711
0.1338532567024231
0.13426198065280914
0.13573776185512

KeyboardInterrupt: 

In [22]:
criterion = CustomLoss(nn.CrossEntropyLoss(), first_order_weight=0.001, second_order_weight=0.0)
optimizer = optim.Adam(model.parameters(), lr=0.001)

torch.cuda.synchronize()
start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 100, 1024 * 16)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

0.1276797503232956
0.14556999504566193
0.20644241571426392
0.1409117579460144
0.17248648405075073
0.17779326438903809
0.1460217982530594
0.13883310556411743
0.15842728316783905
0.1570667028427124
0.14353376626968384
0.1381194144487381
0.14764991402626038
0.14673283696174622
0.14023776352405548
0.13124912977218628
0.14091616868972778
0.14633320271968842
0.1445416808128357
0.13608431816101074
0.13309882581233978
0.13844327628612518
0.14376159012317657
0.13384976983070374
0.13702142238616943
0.14154236018657684
0.14169839024543762
0.14018021523952484
0.08784659206867218
Epoch 1, Training Loss: 0.1455321722865009, Validation Loss: 0.27545665412840925
Training Accuracy: 0.9553795214808664, Training F1 Score: 0.9552557786073864
Validation Accuracy: 0.8886087278297462, Validation F1 Score: 0.8880335231782629

0.1258622109889984
0.1275193691253662
0.12638263404369354
0.13241851329803467
0.13277436792850494
0.1342674195766449
0.13350467383861542
0.13033349812030792
0.1313978135585785
0.13407172

KeyboardInterrupt: 

In [24]:
criterion = CustomLoss(nn.CrossEntropyLoss(), first_order_weight=0.001, second_order_weight=0.0)
optimizer = optim.Adam(model.parameters(), lr=0.001)

torch.cuda.synchronize()
start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 100, 1024 * 16)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

55.86261749267578
55.520809173583984
54.954681396484375
54.57051467895508
54.140663146972656
53.65220260620117
53.204315185546875
52.801979064941406
52.39165115356445
51.96141052246094
51.5299072265625
51.11573791503906
50.72603988647461
50.33458709716797
49.92927551269531
49.51970291137695
49.1346549987793
48.75727844238281
48.38813781738281
48.00554656982422
47.62041091918945
47.250099182128906
46.8955192565918
46.5379638671875
46.18257522583008
45.826080322265625
45.471988677978516
45.131385803222656
44.74567413330078
Epoch 1, Training Loss: 50.19299698526793, Validation Loss: 0.2724656918504899
Training Accuracy: 0.9489274088926849, Training F1 Score: 0.9486870114025625
Validation Accuracy: 0.889013192430488, Validation F1 Score: 0.888164393811446

44.441123962402344
44.104248046875
43.77143859863281
43.45079040527344
43.12820816040039
42.807090759277344
42.48658752441406
42.16973114013672
41.86003494262695
41.55521011352539
41.25357437133789
40.946128845214844
40.64751052856445
40

KeyboardInterrupt: 

In [25]:
criterion = CustomLoss(nn.CrossEntropyLoss(), first_order_weight=0.0001, second_order_weight=0.0)
optimizer = optim.Adam(model.parameters(), lr=0.001)

torch.cuda.synchronize()
start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 100, 1024 * 16)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

2.163119077682495
2.1859138011932373
2.18501615524292
2.149562358856201
2.1259267330169678
2.123081922531128
2.092665672302246
2.068492889404297
2.0681142807006836
2.0591070652008057
2.0343358516693115
2.013076066970825
2.010281562805176
1.99607253074646
1.9712285995483398
1.957413673400879
1.9536710977554321
1.9387280941009521
1.9248355627059937
1.9052294492721558
1.8937668800354004
1.879279613494873
1.8704906702041626
1.8547483682632446
1.8466651439666748
1.8348311185836792
1.8196868896484375
1.8090397119522095
1.7689783573150635
Epoch 1, Training Loss: 1.9876267404748666, Validation Loss: 0.26002968068230703
Training Accuracy: 0.9336351060328005, Training F1 Score: 0.9336921611671553
Validation Accuracy: 0.8929115427312548, Validation F1 Score: 0.8927845808857737

1.7728846073150635
1.7585972547531128
1.750899076461792
1.7480612993240356
1.7333537340164185
1.72225022315979
1.7126295566558838
1.7008702754974365
1.6901122331619263
1.6822854280471802
1.6755934953689575
1.66052675247192

KeyboardInterrupt: 

In [26]:
criterion = CustomLoss(nn.CrossEntropyLoss(), first_order_weight=0.00001, second_order_weight=0.0)
optimizer = optim.Adam(model.parameters(), lr=0.001)

torch.cuda.synchronize()
start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 100, 1024 * 16)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

0.26491695642471313
0.30420538783073425
0.2985490560531616
0.2880556881427765
0.31452134251594543
0.2860632836818695
0.2912313938140869
0.28673893213272095
0.2747936248779297
0.2862904667854309
0.28780150413513184
0.28068187832832336
0.27987322211265564
0.28430694341659546
0.2720271050930023
0.27261459827423096
0.27976199984550476
0.27920886874198914
0.2801530659198761
0.2738400995731354
0.27009597420692444
0.2678475081920624
0.2751966714859009
0.2734379768371582
0.2733062505722046
0.27197766304016113
0.27376389503479004
0.27342867851257324
0.2432410717010498
Epoch 1, Training Loss: 0.28039128483799153, Validation Loss: 0.26912713933296534
Training Accuracy: 0.9208492090299456, Training F1 Score: 0.9208786563820484
Validation Accuracy: 0.890467543867198, Validation F1 Score: 0.89021530393757

0.2550717890262604
0.2558390498161316
0.2563392221927643
0.26364362239837646
0.26243874430656433
0.26292532682418823
0.2635178864002228
0.26200568675994873
0.261469304561615
0.26653721928596497
0.

KeyboardInterrupt: 

In [None]:
for param_group in optimizer.param_groups:
    param_group['lr'] = 0.0001

torch.cuda.synchronize()
start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 20, 1024 * 16)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

In [None]:
for param_group in optimizer.param_groups:
    param_group['lr'] = 0.001

torch.cuda.synchronize()
start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 20, 1024 * 16)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

In [None]:
optimizer = optim.Adam(model.parameters(), lr=0.0001)

torch.cuda.synchronize()
start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 100, 1024 * 16)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

In [None]:
optimizer = optim.Adam(model.parameters(), lr=0.0001)

torch.cuda.synchronize()
start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 100, 1024 * 16)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

In [None]:
optimizer = optim.Adam(model.parameters(), lr=0.0001)

torch.cuda.synchronize()
start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 100, 1024 * 16)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

In [None]:
optimizer = optim.Adam(model.parameters(), lr=0.0001)

torch.cuda.synchronize()
start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 100, 1024 * 16)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

In [None]:
evaluate_model(model, custom_train_loader, criterion, optimizer, 100, 1024)

In [None]:
optimizer = optim.Adam(model.parameters(), lr=0.0001)

torch.cuda.synchronize()
start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 1, 1024 * 16)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

In [None]:
optimizer = optim.Adam(model.parameters(), lr=0.001, fused=True)

torch.cuda.synchronize()
start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 1, 1024 * 16)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

In [None]:
optimizer = optim.Adam(model.parameters(), lr=0.001)

start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 100, 1024 * 32)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

In [None]:
optimizer = optim.Adam(model.parameters(), lr=0.001)
evaluate_model(model, custom_train_loader, criterion, optimizer, 100, 1024 * 32)

In [None]:
optimizer = optim.Adam(model.parameters(), lr=0.0001)
evaluate_model(model, custom_train_loader, criterion, optimizer, 10000, 1024 * 32)

In [None]:
optimizer = optim.Adam(model.parameters(), lr=0.001)
evaluate_model(model, custom_train_loader, criterion, optimizer, 10000, 1024 * 32)

In [None]:
optimizer = optim.SGD(model.parameters(), lr=0.05)

start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 1, 1024 * 16)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

In [None]:
optimizer = optim.Adam(model.parameters(), lr=0.01)

start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 1, 1024 * 16)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

In [None]:
optimizer = optim.Adam(model.parameters(), lr=0.1)

start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 100, 1024 * 16)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

In [None]:
optimizer = optim.Adam(model.parameters(), lr=0.1)
optimizer = torch.optim.LBFGS(model.parameters(), lr=0.01)

start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 100, 1024 * 1000)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

In [None]:
optimizer = optim.Adam(model.parameters(), lr=0.1)
optimizer = torch.optim.LBFGS(model.parameters(), lr=0.01)

start_time = time.time()

evaluate_model(model, custom_train_loader, criterion, optimizer, 100, 1024 * 1000)

elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")

In [None]:
torch.cuda.synchronize()

with profiler.profile(with_stack=True, use_device='cuda') as prof:
    optimizer = optim.Adam(model.parameters(), lr=0.01)
    
    start_time = time.time()
    
    evaluate_model(model, custom_train_loader, criterion, optimizer, 1, 1024 * 1000)
    
    elapsed_time = time.time() - start_time
    print(f"Execution time: {elapsed_time:.6f} seconds")

In [None]:
prof_averages = prof.key_averages().table(sort_by="self_cuda_time_total", row_limit=20)
print(prof_averages)

In [None]:
prof_averages = prof.key_averages().table(sort_by="self_cpu_time_total", row_limit=20)
print(prof_averages)

In [None]:
print(model.copy_tensor[0:, 0])
print(model.copy_tensor.shape)
print(model.copy_tensor[0:, 0].shape)
tensor_data = model.copy_tensor[0, 0, :].cpu().detach().numpy()

# Create a grid for x and y from 0 to 1, matching the dimensions of the tensor
x = np.linspace(0, 1, tensor_data.shape[1])
y = np.linspace(0, 1, tensor_data.shape[0])
X, Y = np.meshgrid(x, y)

# Z values are the tensor data (already 2D, so no need to flatten)
Z = tensor_data

fig = plt.figure(figsize=(8, 6))
ax = fig.add_subplot(111, projection='3d')

# Create surface plot, connecting the points
surf = ax.plot_surface(X, Y, Z, cmap='viridis', edgecolor='none')

# Labels and title
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('Z')

plt.title('3D Surface Plot of Tensor Data')
plt.colorbar(surf, label='Z values')

plt.show()

# print(model.copy_tensor[1, 0, :].reshape(-1).shape)
# tensor = model.copy_tensor[0, 0, :].reshape(-1).detach().cpu()

# x_values = torch.arange(len(tensor))

# plt.scatter(x_values, tensor, marker='o')
# plt.xlabel('Index')
# plt.ylabel('Value')
# plt.title('1D Tensor Values')
# plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Assuming tensor_data is defined here or generated dynamically
# Loop 45 times for creating the plots
for i in range(45):
    # Replace this with your method of generating or updating tensor_data for each plot
    # Example: tensor_data = np.random.rand(50, 50) # Random data for demo
    tensor_data = model.copy_tensor[i, 0, :].cpu().detach().numpy()

    # Create a grid for x and y from 0 to 1, matching the dimensions of the tensor
    x = np.linspace(0, 1, tensor_data.shape[1])
    y = np.linspace(0, 1, tensor_data.shape[0])
    X, Y = np.meshgrid(x, y)

    # Flatten the X, Y, and tensor data (Z values) for scatter plot
    X_flat = X.flatten()
    Y_flat = Y.flatten()
    Z_flat = tensor_data.flatten()

    fig = plt.figure(figsize=(8, 6))
    ax = fig.add_subplot(111, projection='3d')

    # Scatter plot in 3D
    sc = ax.scatter(X_flat, Y_flat, Z_flat, c=Z_flat, cmap='viridis')

    # Labels and title
    ax.set_xlabel('X')
    ax.set_ylabel('Y')
    ax.set_zlabel('Z')
    plt.title(f'3D Scatter Plot of Tensor Data - Plot {i+1}')

    # Add color bar
    plt.colorbar(sc, label='Z values')

    # Show the plot for each iteration
    plt.show()


In [None]:
X_train = custom_train_loader.train_data_tensor.cpu().numpy()
y_train = custom_train_loader.train_labels_tensor.cpu().numpy()
X_val = custom_train_loader.val_data_tensor.cpu().numpy()
y_val = custom_train_loader.val_labels_tensor.cpu().numpy()

"""
log_reg = LogisticRegression(penalty='l2', C=55.0)

"""
log_reg = LogisticRegression(solver='lbfgs', max_iter=1000, penalty=None)
log_reg = LogisticRegression()

log_reg.fit(X_train, y_train)

y_train_pred = log_reg.predict(X_train)
y_val_pred = log_reg.predict(X_val)

train_accuracy = accuracy_score(y_train, y_train_pred)
val_accuracy = accuracy_score(y_val, y_val_pred)

train_log_loss = log_loss(y_train, log_reg.predict_proba(X_train))
val_log_loss = log_loss(y_val, log_reg.predict_proba(X_val))

print(f'Training Accuracy: {train_accuracy}')
print(f'Training Log Loss: {train_log_loss}')
print()
print(f'Validation Accuracy: {val_accuracy}')
print(f'Validation Log Loss: {val_log_loss}')