In [2]:
import main as a
import pickle as pkl
import torch.nn as nn
import torch
from torch.utils.data import DataLoader
from importlib import reload
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import f1_score
import pandas as pd

In [4]:
with open("data/tadpole/X.pkl", "rb") as file:
    X = pkl.load(file)
early = False
with open("data/tadpole/y.pkl", "rb") as file:
    if early:
        y = pkl.load(file)[:,1:]
    else:
        y = pkl.load(file)[:,:-1]

In [5]:
X = X.reshape(X.shape[0]*X.shape[1], X.shape[2])
y = y.flatten()
weight = torch.tensor(compute_class_weight(class_weight="balanced", classes=np.unique(y), y=y), dtype=torch.float32)

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X,y, stratify=y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train,y_train, stratify=y_train, test_size=0.1, random_state=42)

In [63]:
reload(a)

<module 'main' from '/Users/aviadsusman/Documents/Python_Projects/FLA_2/FLA/main.py'>

In [64]:
train_dataset = a.npDataset(X_train,y_train)
test_dataset = a.npDataset(X_test,y_test)
val_dataset = a.npDataset(X_val,y_val)
batch_size = 100

train_loader = DataLoader(train_dataset, batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

In [65]:
hidden_dims = [160,40,10]
attn_heads = 1
model = a.FLANN(input_dim=337, hidden_dims=hidden_dims, output_dim=3, attn_heads=attn_heads, activation=nn.ReLU())

In [66]:
criterion = nn.CrossEntropyLoss(weight=weight)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
metric = a.MacroF1Score(num_classes=3)

In [67]:
for x in model.flas[0].parameters():
    print(x)

Parameter containing:
tensor([0.1108], requires_grad=True)
Parameter containing:
tensor([0.0725], requires_grad=True)
Parameter containing:
tensor([0.6380], requires_grad=True)
Parameter containing:
tensor([0.6316], requires_grad=True)
Parameter containing:
tensor([0.3881], requires_grad=True)
Parameter containing:
tensor([0.4458], requires_grad=True)


In [68]:
num_epochs = 500
best_val_loss = float('inf')
best_model = None
patience = 10
early_stop_counter = 0
for epoch in range(num_epochs):
    model.train()
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    model.eval()
    val_losses = []
    for inputs, labels in val_loader:
        with torch.no_grad():
            outputs = model(inputs)
            val_loss = criterion(outputs, labels)
            val_losses.append(val_loss.item())
    
    avg_val_loss = np.mean(val_losses)
    print(f'Epoch {epoch+1}, Validation Loss: {avg_val_loss:.4f}')
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        best_model = model.state_dict()
        early_stop_counter = 0
    else:
        early_stop_counter += 1
    
    if early_stop_counter >= patience:
        print(f'Early stopping after epoch {epoch+1} with validation loss {best_val_loss:.4f}')
        break

model.load_state_dict(best_model)
model.eval()

0.010940074920654297 337
0.0029408931732177734 160
0.0007979869842529297 40
0.009449958801269531 337
0.002869129180908203 160
0.00022029876708984375 40
0.009325027465820312 337
0.0026140213012695312 160
0.00018167495727539062 40
0.007921934127807617 337
0.002045869827270508 160
0.0001671314239501953 40
0.006518125534057617 337
0.0015680789947509766 160
0.00012302398681640625 40
0.0068590641021728516 337
0.001667022705078125 160
0.00012612342834472656 40
0.0057830810546875 337
0.0014879703521728516 160
0.00025391578674316406 40
0.0058820247650146484 337
0.0015850067138671875 160
0.00015306472778320312 40
0.0060389041900634766 337
0.0013899803161621094 160
0.00019288063049316406 40
0.005957126617431641 337
0.0019369125366210938 160
0.00037288665771484375 40
0.005857944488525391 337
0.001219034194946289 160
0.000164031982421875 40
0.0065920352935791016 337
0.0016820430755615234 160
0.0001418590545654297 40
0.0061109066009521484 337
0.0012211799621582031 160
0.00013685226440429688 40
0.005

FLANN(
  (activation): ReLU()
  (linears): ModuleList(
    (0): Linear(in_features=337, out_features=160, bias=True)
    (1): Linear(in_features=160, out_features=40, bias=True)
    (2): Linear(in_features=40, out_features=10, bias=True)
  )
  (linear_norms): ModuleList(
    (0): LayerNorm((160,), eps=1e-05, elementwise_affine=True)
    (1): LayerNorm((40,), eps=1e-05, elementwise_affine=True)
    (2): LayerNorm((10,), eps=1e-05, elementwise_affine=True)
  )
  (flas): ModuleList(
    (0-2): 3 x FLAttention(
      (alphas): ParameterDict(
          (query_0): Parameter containing: [torch.FloatTensor of size 1]
          (key_0): Parameter containing: [torch.FloatTensor of size 1]
          (value_0): Parameter containing: [torch.FloatTensor of size 1]
      )
      (betas): ParameterDict(
          (query_0): Parameter containing: [torch.FloatTensor of size 1]
          (key_0): Parameter containing: [torch.FloatTensor of size 1]
          (value_0): Parameter containing: [torch.FloatTe

In [69]:
test_losses = []
test_predictions = []
test_true_labels = []

for inputs, labels in test_loader:
    with torch.no_grad():
        outputs = model(inputs)
        test_loss = criterion(outputs, labels)
        test_losses.append(test_loss.item())
        predictions = torch.argmax(outputs, dim=1)
        test_predictions.extend(predictions.cpu().numpy())
        test_true_labels.extend(labels.cpu().numpy())

avg_test_loss = np.mean(test_losses)
test_score = f1_score(test_true_labels, test_predictions, average='weighted')
print(f'Test Loss: {avg_test_loss:.4f}, Test Score: {test_score:.4f}, Predicted Proba: {1/np.exp(avg_test_loss):.4f}')

0.007419109344482422 337
0.0022988319396972656 160
0.00013303756713867188 40
0.007727146148681641 337
0.0019152164459228516 160
0.0002319812774658203 40
0.007494211196899414 337
0.00133514404296875 160
0.0001728534698486328 40
0.006352901458740234 337
0.0015790462493896484 160
0.00010704994201660156 40
0.006112098693847656 337
0.001466989517211914 160
0.0001049041748046875 40
0.006390094757080078 337
0.0015370845794677734 160
0.00018715858459472656 40
Test Loss: 0.5382, Test Score: 0.8750, Predicted Proba: 0.5838


In [70]:
for x in model.flas[0].parameters():
    print(x)

Parameter containing:
tensor([0.1070], requires_grad=True)
Parameter containing:
tensor([0.1401], requires_grad=True)
Parameter containing:
tensor([0.7694], requires_grad=True)
Parameter containing:
tensor([0.5956], requires_grad=True)
Parameter containing:
tensor([0.4241], requires_grad=True)
Parameter containing:
tensor([0.4487], requires_grad=True)
