In [1]:
%load_ext autoreload
%autoreload 2

In [1]:
import torch
from torch.utils.data import DataLoader
from torch.optim.lr_scheduler import MultiStepLR
import torchvision
import matplotlib.pyplot as plt
import dataset
from model import ResNet18YOLOv1
from loss import YOLOv1Loss
from tqdm.auto import tqdm

# About
This is an implementation of YOLOv1 from ***You Only Look Once: Unified, Real-Time Object Detection by Joseph Redmon, Santosh Divvala, Ross Girshick, and Ali Firhadi.*** Object detection is figuring out what objects are in an image and where they are. Another way to look at this problem is how can we write a computer program that draws bounding boxes around objects and predicts what kind of objects they are. YOLO solves this problem and does it super fast, like state of the art fast!

Let's talk about R-CNN, the predecessor to YOLO. It proposed regions, ran a classifier on every region, and did some post-processing to produce the final result. In simple language this translates to:
1. Lemme draw a lot of bounding boxes where I think objects are
2. Lemme figure out what are in the bounding boxes I drew
3. Ok, I drew too many bounding boxes, lemme remove most of them and keep the important ones

This is a lot of steps. What YOLO does instead is ***unified detection***. Unified detection combines the different components of object detection (where are the objects and what kind of objects are they) into one Convolutional Neural Network. You give it an image and in one swoop, it tells you exactly that.

Here's how it does it:
1. Divide the image into a SxS grid
2. Each cell in the grid predicts B bounding boxes and C class probabilities (what it thinks the object is)

We represent bounding boxes with 5 numbers: x, y, w, h, p.
- (x, y): center of the bounding box
- w: width
- h: height
- p: confidence (a measure of how confident we are that this box captures an object and matches the ground truth)

Accordingly, YOLOv1 produces a SxSx(5B+C) tensor. Each cell predicts B bounding boxes, how do we choose which one is the "true" predictor? How do we measure how good our bounding box and classification predictions are? 

We check which bounding box has the greatest overlap (IOU: Intersection Over Union) with the ground truth and choose that one as a predictor. We use this loss function to measure the "goodness" of our predictions:

![yolo loss function](https://i.stack.imgur.com/IddFu.png)

On a high level, it is the squared error between our prediction and the ground truth. 

# PASCAL VOC 2007 Dataset

PASCAL VOC Detection Dataset contains annotated images with 20 labelled classes and bounding boxes. There are 2,501 images in the training set, 2,510 images in the validation set, and 4,952 images in the test set.

In [2]:
# original dataset
pascal_voc_train = torchvision.datasets.VOCDetection(
    root="data",
    year="2007",
    image_set="train",
    download=False
)

pascal_voc_val = torchvision.datasets.VOCDetection(
    root="data",
    year="2007",
    image_set="val",
    download=False
)

pascal_voc_test = torchvision.datasets.VOCDetection(
    root="data",
    year="2007",
    image_set="test",
    download=False
)

In [3]:
# augment dataset for YOLOv1: resize and normalize image and convert bounding boxes from annotations to tensors
voc_train = dataset.PascalVOC(pascal_voc=pascal_voc_train)
voc_val = dataset.PascalVOC(pascal_voc=pascal_voc_val)
voc_test = dataset.PascalVOC(pascal_voc=pascal_voc_test)

TRANSFORMING PASCAL VOC
TRANSFORMING PASCAL VOC
TRANSFORMING PASCAL VOC


In [4]:
BATCH_SIZE = 64

In [5]:
# train_dataloader = DataLoader(voc_train, batch_size=BATCH_SIZE, shuffle=True)
# val_dataloader = DataLoader(voc_val, batch_size=BATCH_SIZE, shuffle=True)
# test_dataloader = DataLoader(voc_test, batch_size=BATCH_SIZE, shuffle=True)

train_dataloader = DataLoader(voc_train, batch_size=BATCH_SIZE, shuffle=False)
val_dataloader = DataLoader(voc_val, batch_size=BATCH_SIZE, shuffle=False)
test_dataloader = DataLoader(voc_test, batch_size=BATCH_SIZE, shuffle=False)

# Training

## Device

In [6]:
DEVICE = "cpu"

if torch.cuda.is_available():
    DEVICE = torch.device("cuda")
if torch.backends.mps.is_available() and torch.backends.mps.is_built():
    DEVICE = torch.device("mps")

DEVICE

device(type='mps')

## Hyperparameters
- S: dimensions of SxS grid
- B: number of bounding boxes predicted per cell
- C: number of classes

In [7]:
S = 7
B = 2
C = 20
lambda_coord = 5.0
lambda_noobj = 0.5

## Model
ResNet18 convolutional layers pretrained on ImageNet with 2 feedforward layers outputting a (N x S x S x (5B + C)) tensor.

In [81]:
yolo = ResNet18YOLOv1(S=S, B=B, C=C).to(DEVICE)

## Loss + Optimizer
![yolo loss function](https://i.stack.imgur.com/IddFu.png)

We use stochastic gradient descent with a learning rate of 1e-3, weight decay (L2 regularization) of 0.0005, and momentum of 0.9.

In [82]:
yolo_loss = YOLOv1Loss(S=S, B=B, C=C, lambda_coord=lambda_coord, lambda_noobj=lambda_noobj)

## Train
Train model with a learning rate of 1e-3 for the first few epochs, raise learning rate to 1e-2 and train for 75 epochs, then lower to 1e-3 for 30 epochs, and finally 1e-4 for 30 epochs.

We train the network for about 135 epochs.

In [83]:
def evaluate_loss(model, criterion, dataloader):
    total_loss = 0
    model.eval()
    
    with torch.no_grad():
        for X, Y in dataloader:
            X = X.to(DEVICE)
            Y = Y.to(DEVICE)

            pred = model(X)
            loss = criterion(pred, Y)
            total_loss += loss.item()
            break
            
    N = len(dataloader)
    # loss = total_loss / N
    loss = total_loss
    
    return loss

In [84]:
torch.cuda.empty_cache()

In [102]:
FIRST_EPOCHS = 3
EPOCHS = 100
MOMENTUM = 0.9
WEIGHT_DECAY = 0.0005

def train_yolo(model, criterion, train_dataloader, val_dataloader):
    pre_optimizer = torch.optim.SGD(model.parameters(), lr=1e-3, weight_decay=WEIGHT_DECAY, momentum=MOMENTUM)
    optimizer = torch.optim.SGD(model.parameters(), lr=1e-3, weight_decay=WEIGHT_DECAY, momentum=MOMENTUM)
    # optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=WEIGHT_DECAY)
    # scheduler = MultiStepLR(optimizer,
    #                         milestones=[3, 6],
    #                         gamma=0.1)
    
    train_losses = []
    val_losses = []
    
    N = len(train_dataloader)
    
    # pretrain lr=1e-3
#     for epoch in range(FIRST_EPOCHS):
#         model.train()
#         total_loss = 0
#         for i, (X, Y) in enumerate(tqdm(train_dataloader, leave=False, desc=f"Pretrain Epoch [{epoch+1}/{FIRST_EPOCHS}]")):
#             X = X.to(DEVICE)
#             Y = Y.to(DEVICE)
            
#             pred = model(X)
#             loss = criterion(pred, Y)
#             total_loss += loss.item()
            
#             # backprop
#             pre_optimizer.zero_grad()
#             loss.backward()
#             pre_optimizer.step()
        
#         # loss = total_loss
#         loss = total_loss / N
#         print(f"Pretrain Epoch [{epoch+1}/{FIRST_EPOCHS}]: Train Loss={loss}")
            
    # train
    for epoch in range(EPOCHS):
        model.train()
        total_loss = 0
        lr = optimizer.param_groups[0]["lr"]
        for i, (X, Y) in enumerate(tqdm(train_dataloader, leave=False, desc=f"Epoch [{epoch+1}/{EPOCHS}]: lr={lr}")):
            X = X.to(DEVICE)
            Y = Y.to(DEVICE)
            
            # print(X)
            
            pred = model(X)
            
            # print(pred)
            loss = criterion(pred, Y)
            # print(loss)
            total_loss += loss.item()
            
            # backprop
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            break
            
        # train_loss = total_loss / N
        train_loss = total_loss
        train_losses.append(train_loss)
        
        # update learning rate
        # scheduler.step()
        
        # evaluate on validation set
        val_loss = evaluate_loss(model, criterion, val_dataloader)
        val_losses.append(val_loss)
        
        print(f"Epoch [{epoch+1}/{EPOCHS}]: Train Loss={train_loss}, Val Loss={val_loss}")
    
    return train_losses, val_losses

In [103]:
train_losses, val_losses = train_yolo(yolo,
           yolo_loss,
           train_dataloader=train_dataloader,
           val_dataloader=val_dataloader)

Epoch [1/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [1/100]: Train Loss=1.4226999282836914, Val Loss=9.295550346374512


Epoch [2/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [2/100]: Train Loss=1.381443977355957, Val Loss=9.30438232421875


Epoch [3/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [3/100]: Train Loss=1.4423173666000366, Val Loss=9.318083763122559


Epoch [4/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [4/100]: Train Loss=1.348246693611145, Val Loss=9.326096534729004


Epoch [5/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [5/100]: Train Loss=1.3324466943740845, Val Loss=9.319059371948242


Epoch [6/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [6/100]: Train Loss=1.4454624652862549, Val Loss=9.295692443847656


Epoch [7/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [7/100]: Train Loss=1.4130418300628662, Val Loss=9.30004596710205


Epoch [8/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [8/100]: Train Loss=1.3122279644012451, Val Loss=9.309688568115234


Epoch [9/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [9/100]: Train Loss=1.399667739868164, Val Loss=9.318323135375977


Epoch [10/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [10/100]: Train Loss=1.4317872524261475, Val Loss=9.340909004211426


Epoch [11/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [11/100]: Train Loss=1.3432748317718506, Val Loss=9.341748237609863


Epoch [12/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [12/100]: Train Loss=1.4162096977233887, Val Loss=9.34377384185791


Epoch [13/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [13/100]: Train Loss=1.3617572784423828, Val Loss=9.335241317749023


Epoch [14/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [14/100]: Train Loss=1.3078458309173584, Val Loss=9.340514183044434


Epoch [15/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [15/100]: Train Loss=1.3257224559783936, Val Loss=9.31801986694336


Epoch [16/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [16/100]: Train Loss=1.315994143486023, Val Loss=9.31915283203125


Epoch [17/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [17/100]: Train Loss=1.3073451519012451, Val Loss=9.31729793548584


Epoch [18/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [18/100]: Train Loss=1.3174796104431152, Val Loss=9.332795143127441


Epoch [19/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [19/100]: Train Loss=1.346002221107483, Val Loss=9.360315322875977


Epoch [20/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [20/100]: Train Loss=1.2276105880737305, Val Loss=9.356961250305176


Epoch [21/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [21/100]: Train Loss=1.343733787536621, Val Loss=9.31737995147705


Epoch [22/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [22/100]: Train Loss=1.1791801452636719, Val Loss=9.3945894241333


Epoch [23/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [23/100]: Train Loss=1.1482956409454346, Val Loss=9.424966812133789


Epoch [24/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [24/100]: Train Loss=1.2656604051589966, Val Loss=9.450223922729492


Epoch [25/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [25/100]: Train Loss=1.159764289855957, Val Loss=9.45606803894043


Epoch [26/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [26/100]: Train Loss=1.228255271911621, Val Loss=9.426955223083496


Epoch [27/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [27/100]: Train Loss=1.276000738143921, Val Loss=9.412359237670898


Epoch [28/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [28/100]: Train Loss=1.1319864988327026, Val Loss=9.406501770019531


Epoch [29/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [29/100]: Train Loss=1.2665059566497803, Val Loss=9.401747703552246


Epoch [30/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [30/100]: Train Loss=1.0919573307037354, Val Loss=9.402929306030273


Epoch [31/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [31/100]: Train Loss=1.2031621932983398, Val Loss=9.38081169128418


Epoch [32/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [32/100]: Train Loss=1.0976206064224243, Val Loss=9.369421005249023


Epoch [33/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [33/100]: Train Loss=1.1149710416793823, Val Loss=9.346834182739258


Epoch [34/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [34/100]: Train Loss=1.1445069313049316, Val Loss=9.350831985473633


Epoch [35/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [35/100]: Train Loss=1.085208535194397, Val Loss=9.368152618408203


Epoch [36/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [36/100]: Train Loss=1.0830397605895996, Val Loss=9.382030487060547


Epoch [37/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [37/100]: Train Loss=1.0481939315795898, Val Loss=9.417468070983887


Epoch [38/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [38/100]: Train Loss=1.0023730993270874, Val Loss=9.434249877929688


Epoch [39/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [39/100]: Train Loss=1.0087852478027344, Val Loss=9.409116744995117


Epoch [40/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [40/100]: Train Loss=1.0425671339035034, Val Loss=9.42041301727295


Epoch [41/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [41/100]: Train Loss=1.0036659240722656, Val Loss=9.404603958129883


Epoch [42/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [42/100]: Train Loss=0.9938126802444458, Val Loss=9.392069816589355


Epoch [43/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [43/100]: Train Loss=0.9800923466682434, Val Loss=9.390249252319336


Epoch [44/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [44/100]: Train Loss=0.9357098340988159, Val Loss=9.411674499511719


Epoch [45/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [45/100]: Train Loss=0.9886789321899414, Val Loss=9.394639015197754


Epoch [46/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [46/100]: Train Loss=0.9270832538604736, Val Loss=9.381953239440918


Epoch [47/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [47/100]: Train Loss=0.9049339890480042, Val Loss=9.357748031616211


Epoch [48/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [48/100]: Train Loss=0.8903659582138062, Val Loss=9.341390609741211


Epoch [49/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [49/100]: Train Loss=0.969981849193573, Val Loss=9.359940528869629


Epoch [50/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [50/100]: Train Loss=0.9261417984962463, Val Loss=9.365127563476562


Epoch [51/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [51/100]: Train Loss=0.9724393486976624, Val Loss=9.381175994873047


Epoch [52/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [52/100]: Train Loss=0.9426606893539429, Val Loss=9.377216339111328


Epoch [53/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [53/100]: Train Loss=0.8492224812507629, Val Loss=9.40155029296875


Epoch [54/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [54/100]: Train Loss=0.830116868019104, Val Loss=9.413568496704102


Epoch [55/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [55/100]: Train Loss=0.8508497476577759, Val Loss=9.419610977172852


Epoch [56/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [56/100]: Train Loss=0.8577970862388611, Val Loss=9.437518119812012


Epoch [57/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [57/100]: Train Loss=0.8737221956253052, Val Loss=9.426719665527344


Epoch [58/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [58/100]: Train Loss=0.909874677658081, Val Loss=9.411642074584961


Epoch [59/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [59/100]: Train Loss=0.8626623153686523, Val Loss=9.424388885498047


Epoch [60/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [60/100]: Train Loss=0.809068500995636, Val Loss=9.414335250854492


Epoch [61/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [61/100]: Train Loss=0.7868480682373047, Val Loss=9.415413856506348


Epoch [62/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [62/100]: Train Loss=0.7956171631813049, Val Loss=9.413431167602539


Epoch [63/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [63/100]: Train Loss=0.9016607403755188, Val Loss=9.42173957824707


Epoch [64/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [64/100]: Train Loss=0.8137131929397583, Val Loss=9.436012268066406


Epoch [65/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [65/100]: Train Loss=0.764212965965271, Val Loss=9.41010856628418


Epoch [66/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [66/100]: Train Loss=0.8128156661987305, Val Loss=9.407011032104492


Epoch [67/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [67/100]: Train Loss=0.8323600888252258, Val Loss=9.413743019104004


Epoch [68/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [68/100]: Train Loss=0.8027945756912231, Val Loss=9.41811752319336


Epoch [69/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [69/100]: Train Loss=0.8169479370117188, Val Loss=9.417243957519531


Epoch [70/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [70/100]: Train Loss=0.8198789954185486, Val Loss=9.41881275177002


Epoch [71/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [71/100]: Train Loss=0.8006020784378052, Val Loss=9.415396690368652


Epoch [72/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [72/100]: Train Loss=0.773610532283783, Val Loss=9.396245956420898


Epoch [73/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [73/100]: Train Loss=0.7916488647460938, Val Loss=9.378190994262695


Epoch [74/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [74/100]: Train Loss=0.8011972308158875, Val Loss=9.376564025878906


Epoch [75/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [75/100]: Train Loss=0.7652885913848877, Val Loss=9.399459838867188


Epoch [76/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [76/100]: Train Loss=0.6797167062759399, Val Loss=9.40687084197998


Epoch [77/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [77/100]: Train Loss=0.7334839105606079, Val Loss=9.4034423828125


Epoch [78/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [78/100]: Train Loss=0.7391725182533264, Val Loss=9.433501243591309


Epoch [79/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [79/100]: Train Loss=0.7319543957710266, Val Loss=9.38755989074707


Epoch [80/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [80/100]: Train Loss=0.7145847082138062, Val Loss=9.383210182189941


Epoch [81/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [81/100]: Train Loss=0.7020145654678345, Val Loss=9.350354194641113


Epoch [82/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [82/100]: Train Loss=0.7188758254051208, Val Loss=9.363938331604004


Epoch [83/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [83/100]: Train Loss=0.747251033782959, Val Loss=9.35501766204834


Epoch [84/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [84/100]: Train Loss=0.7250421047210693, Val Loss=9.323434829711914


Epoch [85/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [85/100]: Train Loss=0.6601014137268066, Val Loss=9.271267890930176


Epoch [86/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [86/100]: Train Loss=0.7038254141807556, Val Loss=9.277257919311523


Epoch [87/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [87/100]: Train Loss=0.6837669610977173, Val Loss=9.281061172485352


Epoch [88/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [88/100]: Train Loss=0.6423935890197754, Val Loss=9.303253173828125


Epoch [89/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [89/100]: Train Loss=0.6990892887115479, Val Loss=9.317045211791992


Epoch [90/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [90/100]: Train Loss=0.6805495619773865, Val Loss=9.367020606994629


Epoch [91/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [91/100]: Train Loss=0.6464428901672363, Val Loss=9.366422653198242


Epoch [92/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [92/100]: Train Loss=0.7149147391319275, Val Loss=9.34559440612793


Epoch [93/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [93/100]: Train Loss=0.7269088625907898, Val Loss=9.356246948242188


Epoch [94/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [94/100]: Train Loss=0.6580234169960022, Val Loss=9.362043380737305


Epoch [95/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [95/100]: Train Loss=0.7444828748703003, Val Loss=9.371784210205078


Epoch [96/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [96/100]: Train Loss=0.6832801103591919, Val Loss=9.384918212890625


Epoch [97/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [97/100]: Train Loss=0.690313458442688, Val Loss=9.36836051940918


Epoch [98/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [98/100]: Train Loss=0.6955564618110657, Val Loss=9.373592376708984


Epoch [99/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [99/100]: Train Loss=0.6922789812088013, Val Loss=9.37344741821289


Epoch [100/100]: lr=0.001:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [100/100]: Train Loss=0.6350679397583008, Val Loss=9.38142204284668


In [None]:
plt.plot(range(EPOCHS), train_losses)
plt.plot(range(EPOCHS), val_losses)