# Sequential MNIST

## Imports

In [1]:
import random

import torch
import torch.nn as nn

import numpy as np

import wandb

from torch.utils.data import DataLoader
import torchvision
import torchvision.transforms as transforms

from datasets.SequentialMNIST import SequentialMNIST
from utils.contexts import evaluating
from fptt.utils.config import get_cfg
from utils.train import train
from models.LSTMWithLinearLayer import LSTMWithLinearLayer

## Configure device

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

print(f'Device: {device}')

Device: cpu


## Configure parameters

In [3]:
config = {
    "criterion": {"name": "CrossEntropyLoss"},
    "model": {
        "hidden_size": 128,
        "input_size": 1,
        "name": "linear_LSTM",
        "output_size": 10,
    },
    "optimizer": {
        "grad_clip_value": 1.0,
        "lr": 0.001,
        "momentum": 0.9,
        "name": "SGD",
        "weight_decay": 0.0,
    },
    "task": {"T": 784, "name": "sequential_mnist"},
    "test": {"batch_size": 1024},
    "train": {"batch_size": 100, "n_epochs": 100},
}


## Load data

### Set seeds

In [4]:
SEED = 0

torch.manual_seed(SEED)
np.random.seed(SEED)
random.seed(SEED)


### Setup

In [5]:
MNIST_MEAN = 0.1307
MNIST_STD = 0.3081

mnist_transforms = transforms.Compose([transforms.ToTensor(), transforms.Normalize((MNIST_MEAN,), (MNIST_STD,))])


### Train

In [6]:
train_data = SequentialMNIST(torchvision.datasets.MNIST('.', train=True, download=True, transform=mnist_transforms))

train_data_loader = DataLoader(train_data, batch_size=config['train']['batch_size'])

### Test

In [7]:
test_data = SequentialMNIST(torchvision.datasets.MNIST('.', train=False, download=True, transform=mnist_transforms))

test_data_loader = DataLoader(test_data, batch_size=config['test']['batch_size'])

## Train model(s)

In [8]:
criterion = nn.CrossEntropyLoss()

# Single function to make sure the same network can be used for single instance training, sweeps and test run visualization
create_net = lambda: LSTMWithLinearLayer(config['model']['input_size'], config['model']['hidden_size'], config['model']['output_size'])

### Single configuration

Run the cell below in order to train a model with the configuration as specified in the `cfg` dictionary.

In [None]:
PROJECT = "project_name"
ENTITY = "user_name"

with wandb.init(project=PROJECT, entity=ENTITY, config=config):
    train(create_net(), train_data_loader, test_data_loader, criterion, device)