In [26]:
import numpy as np
import random
import time

import torch
from torch import nn, optim
from torch.optim import SGD, Optimizer
from torch.optim.lr_scheduler import MultiStepLR
from torch.utils.data import Dataset, DataLoader
from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm

from easyfsl.samplers import TaskSampler
from easyfsl.utils import sliding_average

from get_processed_data import get_processed_data

### Splitting data

In [15]:
df, X_train, y_train, X_val, y_val, X_test, y_test = get_processed_data()

train_df = X_train.copy()
train_df['Fraud'] = y_train
train_df['Normal'] = 1 - y_train

validation_df = X_val.copy()
validation_df['Fraud'] = y_val
validation_df['Normal'] = 1 - y_val

test_df = X_test.copy()
test_df['Fraud'] = y_test
test_df['Normal'] = 1 - y_test

ones = 0
zeroes = 0
for y in y_train:
    if y == 1:
        ones += 1
    else:
        zeroes += 1
print(ones, zeroes)

Training set shape: (12335, 55) (12335,)
Validation set shape: (1542, 55) (1542,)
Test set shape: (1542, 55) (1542,)
724 11611


### Custom dataset object

In [21]:
## Needs to be a FewShotDataset / torch Dataset with .get_labels; consider using WrapFewShotDataset?
class FSLDataset (Dataset):
    def __init__ (self, dataframe, label_idx = -1, transformation = None):
        self.dataframe = dataframe
        self.transformation = transformation
        self.label_idx = label_idx
        print('Disclaimer: It is assumed that the label is the last column of the input dataframe... ...')

    def __len__ (self):
        return len(self.dataframe)
    
    def __getitem__ (self, index):
        relevant_row = self.dataframe.iloc[index, :].values
        features = relevant_row[ : self.label_idx]
        label = relevant_row[self.label_idx]

        if self.transformation != None:
            features = self.transformation(features)
        
        return torch.Tensor(features), torch.Tensor([label])
    
    ## Required to use EasyFSL methods; returns a list of dataset's labels (TBC)
    def get_labels (self):      
        ret_list = self.dataframe.iloc[ : , self.label_idx]

        return ret_list.tolist()

### Prototypical network

In [7]:
class PrototypicalNetwork (nn.Module):
    def __init__(self, backbone: nn.Module):
        super(PrototypicalNetwork, self).__init__()
        self.backbone = backbone

    ## Predict query labels using labelled support data
    def forward (self, support_data: torch.Tensor, support_labels: torch.Tensor, 
                 query_data: torch.Tensor) -> torch.Tensor:
        ## Extract features / embedding of support and query data (using backbone)
        z_support = self.backbone.forward(support_data)
        z_query = self.backbone.forward(query_data)

        ## Infer no. of unique classes from support set labels
        n_way = len(torch.unique(support_labels))

        ## Construct prototypes
            ## Prototype i = Mean of embeddings of all support data with label i
        z_proto = torch.cat(
            [
                z_support[torch.nonzero(support_labels == label)].mean(0) \
                    for label in range(n_way)
            ]
        )

        ## Compute euclidean distance from query data to prototypes, and classification scores
        dists = torch.cdist(z_query, z_proto)
        classification_scores = -dists ## Smaller distance -> Higher score

        return classification_scores ## To be compared to actual query labels

### Model training (meta-learning / episodic training)

Episodic training simulates the few-shot learning scenario to train a prototypical network. Training data is organized into episodes that resemble few-shot tasks.

Training loop

In [30]:
## Represents one epoch / episode of multiple tasks
    ## Each epoch produces a new model
def training_epoch (model, data_loader: DataLoader, optimizer: Optimizer, loss_fn):
    all_loss = []
    model.train()

    with tqdm(enumerate(data_loader), total = len(data_loader)) as tqdm_train:
        ## For each task, make prediction, calculate loss, update model params
        for task_idx, (support_data, support_labels, query_data, query_labels, _) in tqdm_train:
                       
            optimizer.zero_grad()
            classification_scores = model.forward(support_data, support_labels, query_data) ##TODO: Define method

            task_loss = loss_fn(classification_scores, query_labels)
            task_loss.backward()
            optimizer.step()

            all_loss.append(task_loss)

            tqdm_train.set_postfix(loss = all_loss.mean())
    
    return all_loss.mean()

Set up

In [8]:
random_seed = 0
np.random.seed(random_seed)
torch.manual_seed(random_seed)
random.seed(random_seed)

In [24]:
N_WAY = 2
N_SHOT = 5
N_QUERY = 10

N_TASKS_PER_EPOCH = 100 
N_VALIDATION_TASKS = 100 

## Datasets need to be a FewShotDataset / torch Dataset with .get_labels
train_set = FSLDataset(train_df)
validation_set = FSLDataset(validation_df)

## Sampliers used to generate tasks
train_sampler = TaskSampler(dataset = train_set, n_way = N_WAY, n_shot = N_SHOT, 
                            n_query = N_QUERY, n_tasks = N_TASKS_PER_EPOCH)
validation_sampler = TaskSampler(dataset = validation_set, n_way = N_WAY, n_shot = N_SHOT,
                                 n_query = N_QUERY, n_tasks = N_VALIDATION_TASKS)

## Loader generates an iterable given a dataset and a sampler
train_loader = DataLoader(dataset = train_set, batch_sampler = train_sampler, pin_memory = True,
                          collate_fn = train_sampler.episodic_collate_fn)
validation_loader = DataLoader(dataset = validation_set, batch_sampler = validation_sampler, pin_memory = True,
                               collate_fn = validation_sampler.episodic_collate_fn)

Disclaimer: It is assumed that the label is the last column of the input dataframe... ...
Disclaimer: It is assumed that the label is the last column of the input dataframe... ...


Initializing optimizer, loss function, etc

In [None]:
## Loss fn
LOSS_FN = nn.CrossEntropyLoss()


## Scheduler
    ## Scales learning rate by gamma at the designated milestones
scheduler_milestones = [120, 160]
scheduler_gamma = 0.1


## Optimizer
model = None ##TODO: Implement model
LEARNING_RATE = 0.001
MOMENTUM = 0.9
DECAY = 5e-4
train_optimizer = optim.SGD(model.parameters(), lr = LEARNING_RATE, momentum = MOMENTUM, 
                            weight_decay = DECAY)
train_scheduler = MultiStepLR(optimizer = train_optimizer, milestones = scheduler_milestones,
                              gamma = scheduler_gamma)


## Writer
log_dir = 'fsl_logs'
tb_writer = SummaryWriter(log_dir = log_dir)


Train the model

In [None]:
N_EPOCHS = 200
log_update_frequency = 10

## Track best parameters (weights and biases) and performance of model
best_state = model.state_dict()
best_validation_accuracy = 0.0

for epoch in range(N_EPOCHS):
    print(f'Epoch: {epoch}')
    
    average_epoch_loss = training_epoch(model, train_loader, train_optimizer)
    validation_accuracy = evaluate(model, validation_loader) 

    if validation_accuracy > best_validation_accuracy:
        best_validation_accuracy = validation_accuracy
        best_state = model.state_dict()
        print("Ding ding ding! We found a new best model!")

    tb_writer.add_scalar("Train/loss", average_epoch_loss, epoch)
    tb_writer.add_scalar("Val/acc", validation_accuracy, epoch)

    ## Update the scheduler such that it knows when to adjust the learning rate
    train_scheduler.step()


## Retrieve the best model
missing_keys, unexpected_keys = model.load_state_dict(best_state)


### Model evaluation

In [None]:
evaluate(model, test_loader) ##TODO: Implement method