In [1]:
from models.AlexNet import AlexNetMNIST, AlexNetMNISTee1, AlexNetMNISTee2
from models.Branchynet import Branchynet

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, TensorDataset
from torchvision import datasets, transforms
import torchvision
import matplotlib

import os
import numpy as np
from datetime import datetime as dt

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [5]:
def count_parameters(model):
    params = [p.numel() for p in model.parameters() if p.requires_grad]
    for item in params:
        print(f'{item:>6}')
    print(f'______\n{sum(params):>6}')

In [2]:
transform = transforms.ToTensor()

batch_size = 600

train_data   = datasets.FashionMNIST(root='../data', train=True, download=True, transform=transform)
test_data    = datasets.FashionMNIST(root='../data', train=False, download=True, transform=transform)
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
test_loader  = DataLoader(test_data, batch_size=batch_size, shuffle=False)

In [7]:
model = AlexNetMNIST().to(device)
print(model)

AlexNetMNIST(
  (layer1): Sequential(
    (0): Conv2d(1, 96, kernel_size=(3, 3), stride=(1, 1))
    (1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=3, stride=1, padding=0, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (0): Conv2d(96, 256, kernel_size=(3, 3), stride=(1, 1))
    (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer3): Sequential(
    (0): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (layer4): Sequential(
    (0): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (layer5):

In [None]:
for i, (X_train, y_train) in enumerate(train_data):
    X_train = X_train.to(device)
    y_train = y_train.to(device)
    break

x = X_train.view(1,1,28,28)
print(x.shape)
x = model.layer1(x)
print(x.shape)
x = model.layer2(x)
print(x.shape)
x = model.layer3(x)
print(x.shape)

In [8]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

import time
start_time = time.time()

epochs = 5
train_losses = []
test_losses = []
train_correct = []
test_correct = []

for i in range(epochs):
    trn_corr = 0
    tst_corr = 0
    tst_cnt  = 0
    
    # Run the training batches
    for b, (X_train, y_train) in enumerate(train_loader):
        X_train = X_train.to(device)
        y_train = y_train.to(device)
        b += 1
        
        # Apply the model
        y_pred = model(X_train)  # we don't flatten X-train here
        loss = criterion(y_pred, y_train)
 
        # Tally the number of correct predictions
        predicted = torch.max(y_pred.data, 1)[1]
        batch_corr = (predicted == y_train).sum()
        trn_corr += batch_corr
        
        # Update parameters
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # Print interim results
        if (b-1)%10 == 0:
            print(f'epoch: {i:2}  batch: {b:4} [{batch_size*b:6}/60000]  loss: {loss.item():10.8f}  \
accuracy train: {trn_corr.item()*100/(batch_size*b):7.3f}%')
        
    train_losses.append(loss)
    train_correct.append(trn_corr)
        
    # Run the testing batches
    with torch.no_grad():
        for b, (X_test, y_test) in enumerate(test_loader):
            X_test = X_test.to(device)
            y_test = y_test.to(device)

            # Apply the model
            y_val = model(X_test)

            # Tally the number of correct predictions
            predicted = torch.max(y_val.data, 1)[1] 
            tst_corr += (predicted == y_test).sum()
            tst_cnt  += len(predicted)

    print(f"Accuracy test: {100*tst_corr/tst_cnt:2.2f}%")
        
print(f'\nDuration: {time.time() - start_time:.0f} seconds') # print the time elapsed  

epoch:  0  batch:    1 [   600/60000]  loss: 2.34583187  accuracy train:  10.667%
epoch:  0  batch:   11 [  6600/60000]  loss: 1.41046405  accuracy train:  29.318%
epoch:  0  batch:   21 [ 12600/60000]  loss: 0.93356800  accuracy train:  43.921%
epoch:  0  batch:   31 [ 18600/60000]  loss: 0.76334333  accuracy train:  52.134%
epoch:  0  batch:   41 [ 24600/60000]  loss: 0.73419976  accuracy train:  57.114%
epoch:  0  batch:   51 [ 30600/60000]  loss: 0.57557380  accuracy train:  60.624%
epoch:  0  batch:   61 [ 36600/60000]  loss: 0.56638235  accuracy train:  63.232%
epoch:  0  batch:   71 [ 42600/60000]  loss: 0.53182906  accuracy train:  65.322%
epoch:  0  batch:   81 [ 48600/60000]  loss: 0.53560692  accuracy train:  67.082%
epoch:  0  batch:   91 [ 54600/60000]  loss: 0.48125726  accuracy train:  68.522%
Accuracy test: 80.02%
epoch:  1  batch:    1 [   600/60000]  loss: 0.58004326  accuracy train:  78.667%
epoch:  1  batch:   11 [  6600/60000]  loss: 0.44685999  accuracy train:  81

In [15]:
x = {
    nome: "Joao"
}
x['nome']

NameError: name 'nome' is not defined