In [1]:
from dataset import YouCookII
from dataset import YouCookIICollate
from torch.utils.data import DataLoader
from loss import loss_RA_MIL
from transformers import get_linear_schedule_with_warmup
from model import Model

import numpy as np
import torch
import matplotlib.pyplot as plt

def train(model, num_actions, batch_size, epochs=25, lr=0.001, y=0.5, MAX_DETECTIONS=20):
    dataset = YouCookII(num_actions, "/h/sagar/ece496-capstone/datasets/ycii")
    train_size = int(len(dataset) * (2/3))
    valid_size = int(len(dataset) - train_size)
    
    train_dataset, valid_dataset = torch.utils.data.random_split(dataset, [train_size, valid_size])
    
    collate = YouCookIICollate(MAX_DETECTIONS=MAX_DETECTIONS)
    
    train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)
    valid_dataloader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)
    
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    scheduler = get_linear_schedule_with_warmup(optimizer, int(0.2*epochs), epochs)

    train_loss = np.zeros(epochs)
    valid_loss = np.zeros(epochs)
    
    model.train()
    
    for epoch in range(epochs):
        epoch_loss = 0.0
        num_batches = 0
        for data in train_dataloader:
            _, bboxes_tensor, features_tensor, steps_list, entity_list, entity_count_list, _, _ = data
            batch_size = len(data[0])
            
            # Zero out any gradients.
            optimizer.zero_grad()

            # Run inference (forward pass).            
            loss_E, loss_V, loss_R, VG_dist1, VG_dist2, _, _, _, _, _ = model(batch_size, num_actions + 1, steps_list, features_tensor, bboxes_tensor, entity_count_list, entity_list)            
            
            # Loss from alignment.
            loss_ = loss_RA_MIL(y, loss_R, loss_E, loss_V, VG_dist1, VG_dist2)

            # Backpropagation (backward pass).
            loss_.backward()

            # Update parameters.
            optimizer.step()
            
            epoch_loss += loss_
            num_batches += 1
        
        #learning rate schedule
        #update after each epoch
        scheduler.step()
        epoch_loss = epoch_loss / num_batches
        
        # Save loss and accuracy at each epoch, plot (and checkpoint).
        train_loss[epoch] = epoch_loss
        valid_loss[epoch] = get_validation_loss(num_actions, y, valid_dataloader)
        
        #after epoch completes
        print("Epoch {} - Train Loss: {}, Validation Loss: {}".format(epoch + 1, train_loss[epoch], valid_loss[epoch]))
    
    plt.plot(train_loss, label='train loss')
    plt.plot(valid_loss, label='valid loss')
    plt.legend()
    plt.show()
        
    return train_loss, valid_loss

def get_validation_loss(num_actions, y, valid_dataloader):
    epoch_loss = 0.0
    num_batches = 0
        
    with torch.no_grad():
        for data in valid_dataloader:
            _, bboxes_tensor, features_tensor, steps_list, entity_list, entity_count_list, _, _ = data
            batch_size = len(data[0])

            # Run inference (forward pass).
            loss_E, loss_V, loss_R, VG_dist1, VG_dist2, _, _, _, _, _ = model(batch_size, num_actions + 1, steps_list, features_tensor, bboxes_tensor, entity_count_list, entity_list)


            # Loss from alignment.
            loss_ = loss_RA_MIL(y, loss_R, loss_E, loss_V, VG_dist1, VG_dist2)
            
            epoch_loss += loss_
            num_batches += 1
            
    epoch_loss = epoch_loss / num_batches
    
    return epoch_loss

PyTorch version 1.6.0 available.


In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Model(device, MAX_DETECTIONS=5)

In [3]:
from eval_fi import eval_all_dataset
eval_all_dataset(model)

--------------------------------------------------
Action 1: cut the cabbage into small pieces
Chosen Frame IoU: 0.05500752478837967
Best IoU possible = 0.8769948482513428
Random Candidate IoU = 0.039505381137132645
This entity has no ground truth bounding box
--------------------------------------------------
Action 2: Add carawat seeds, sea salt
Chosen Frame IoU: 0.0
Best IoU possible = 0.11877390742301941
Random Candidate IoU = 0.015438527800142765
This entity has no ground truth bounding box
--------------------------------------------------
Action 3: mix them evenly in the cabbage
Chosen Frame IoU: 0.040348682552576065
Best IoU possible = 0.22755128145217896
Random Candidate IoU = 0.042893245816230774
Chosen Frame IoU: 0.3224583497472767
Best IoU possible = 0.8768255710601807
Random Candidate IoU = 0.0002340185601497069
--------------------------------------------------
Action 4: Add some leftover brine in a jar
Chosen Frame IoU: 0.12356484681367874
Best IoU possible = 0.233144417

Best IoU possible = 0.14128494262695312
Random Candidate IoU = 0.051539858122448996
--------------------------------------------------
Action 2: peel the apple and kiwi
Chosen Frame IoU: 0.0
Best IoU possible = 0.8488753706881534
Random Candidate IoU = 0.0
This entity has no ground truth bounding box
--------------------------------------------------
Action 3: cut the apple and kiwi
Chosen Frame IoU: 0.0
Best IoU possible = 0.1369498074054718
Random Candidate IoU = 0.08123064041137695
This entity has no ground truth bounding box
--------------------------------------------------
Action 4: cut the onion and garlic
Chosen Frame IoU: 0.033061842803699705
Best IoU possible = 0.03080332651734352
Random Candidate IoU = 0.0
Chosen Frame IoU: 0.0
Best IoU possible = 0.30368152981235563
Random Candidate IoU = 0.12934327125549316
--------------------------------------------------
Action 5: Crush the chopped ingredients in a food processor
Chosen Frame IoU: 0.24279643595218658
Best IoU possible =

--------------------------------------------------
Action 1: Add the meat to a bowl
Chosen Frame IoU: 0.4843759834766388
Best IoU possible = 0.617917001247406
Random Candidate IoU = 0.30047738552093506
Chosen Frame IoU: 0.6951688528060913
Best IoU possible = 0.7723944783210754
Random Candidate IoU = 0.37654924392700195
--------------------------------------------------
Action 2: Add garlic salt, sugar, sesame oil, and sauce to the meat
Chosen Frame IoU: 0.19931736588478088
Best IoU possible = 0.32661930508394493
Random Candidate IoU = 0.2293870747089386
Chosen Frame IoU: 0.29671552777290344
Best IoU possible = 0.639303697863656
Random Candidate IoU = 0.5392189025878906
This entity has no ground truth bounding box
This entity has no ground truth bounding box
This entity has no ground truth bounding box
--------------------------------------------------
Action 3: Add cabbage, green onion, and carrots to the bowl
Chosen Frame IoU: 0.26623539198725343
Best IoU possible = 0.2914719422909032

--------------------------------------------------
Action 1: heat 1 cup chicken stock and soy sauce in a pot
Chosen Frame IoU: 0.0
Best IoU possible = 0.10553665459156036
Random Candidate IoU = 0.0
Chosen Frame IoU: 0.3720382750034332
Best IoU possible = 0.6325271129608154
Random Candidate IoU = 0.3299540877342224
This entity has no ground truth bounding box
--------------------------------------------------
Action 2: Add a little black pepper and white vinegar
Chosen Frame IoU: 0.5249471664428711
Best IoU possible = 0.6679965257644653
Random Candidate IoU = 0.10002966225147247
Chosen Frame IoU: 0.12660154700279236
Best IoU possible = 0.6945008039474487
Random Candidate IoU = 0.4805642068386078
--------------------------------------------------
Action 3: Add a pinch of ajinomoto and sugar
Chosen Frame IoU: 0.0
Best IoU possible = 0.4904511168414796
Random Candidate IoU = 0.4629996418952942
Chosen Frame IoU: 0.496507853269577
Best IoU possible = 0.6811716556549072
Random Candidate IoU =

--------------------------------------------------
Action 1: Take two cups of rice
Chosen Frame IoU: 0.0
Best IoU possible = 0.3935592472553253
Random Candidate IoU = 0.3935592472553253
--------------------------------------------------
Action 2: add 3 lids of rice vinegar
Chosen Frame IoU: 0.0
Best IoU possible = 0.38129182907921105
Random Candidate IoU = 0.004098499193787575
--------------------------------------------------
Action 3: mix it
Chosen Frame IoU: 0.09494048357009888
Best IoU possible = 0.2826422154903412
Random Candidate IoU = 0.0699443593621254
--------------------------------------------------
Action 4: Take shredded crab meat
Chosen Frame IoU: 0.014401362277567387
Best IoU possible = 0.7791619896888733
Random Candidate IoU = 0.1903662085533142
--------------------------------------------------
Action 5: add some mayonnaise
Chosen Frame IoU: 0.0
Best IoU possible = 0.46554169058799744
Random Candidate IoU = 0.2260546066769108
-------------------------------------------

--------------------------------------------------
Action 1: place the egg noodles into a bowl
Chosen Frame IoU: 0.527682900428772
Best IoU possible = 0.700899716634964
Random Candidate IoU = 0.25144797563552856
Chosen Frame IoU: 0.0
Best IoU possible = 0.777563750743866
Random Candidate IoU = 0.6560807228088379
--------------------------------------------------
Action 2: Add the noodles to a pot of boiling water
Chosen Frame IoU: 0.517630398273468
Best IoU possible = 0.5548335313796997
Random Candidate IoU = 0.4228383627293191
Chosen Frame IoU: 0.5183466672897339
Best IoU possible = 0.787959098815918
Random Candidate IoU = 0.7014890909194946
--------------------------------------------------
Action 3: Drain the noodles in a colander
Chosen Frame IoU: 0.0
Best IoU possible = 0.43088865280151367
Random Candidate IoU = 0.22184716165065765
Chosen Frame IoU: 0.6373326182365417
Best IoU possible = 0.6787436008453369
Random Candidate IoU = 0.11780799925327301
--------------------------------

Chosen Frame IoU: 0.03697147945181
Best IoU possible = 0.22955995404317325
Random Candidate IoU = 0.19922979161039586
Mean Upper Bound IoU: 0.2951110601425171, Mean Random IoU: 0.07656492292881012, Mean Model IoU: 0.07522417604923248
Top-1 acc@0.5:
Proposal Upper Bound: 0.22727272727272727, Random: 0.045454545454545456, Model: 0.09090909090909091
--------------------------------------------------
Action 1: Whisk one large egg
Chosen Frame IoU: 0.010718470439314842
Best IoU possible = 0.02439497411251068
Random Candidate IoU = 0.02439497411251068
--------------------------------------------------
Action 2: add sour cream
Chosen Frame IoU: 0.016536258263210278
Best IoU possible = 0.018493665533439743
Random Candidate IoU = 0.0
--------------------------------------------------
Action 3: add little water
Chosen Frame IoU: 0.39421212673187256
Best IoU possible = 0.5541272759437561
Random Candidate IoU = 0.05412973463535309
--------------------------------------------------
Action 4: contin

--------------------------------------------------
Action 1: spread rice onto the seaweed
Chosen Frame IoU: 0.1831646820340744
Best IoU possible = 0.1831646820340744
Random Candidate IoU = 0.0
Chosen Frame IoU: 0.0
Best IoU possible = 0.2641597390174866
Random Candidate IoU = 0.0
--------------------------------------------------
Action 2: sprinkle sesame seeds on top of the rice
Chosen Frame IoU: 0.08850531339658955
Best IoU possible = 0.32371212936642046
Random Candidate IoU = 0.22745106367394577
Chosen Frame IoU: 0.07898589968681335
Best IoU possible = 0.19951459015631068
Random Candidate IoU = 0.0
--------------------------------------------------
Action 3: Flip the sheet over
Chosen Frame IoU: 0.0
Best IoU possible = 0.0486251600086689
Random Candidate IoU = 0.0
This entity has no ground truth bounding box
--------------------------------------------------
Action 4: place tuna in a line on the sheet
Chosen Frame IoU: 0.11953248828649521
Best IoU possible = 0.016689369454979897
Ran

Best IoU possible = 0.1259375661611557
Random Candidate IoU = 0.00808850210160017
--------------------------------------------------
Action 8: Remove the shrimp from the oil
Chosen Frame IoU: 0.5022692680358887
Best IoU possible = 0.6240008473396301
Random Candidate IoU = 0.5579131245613098
Chosen Frame IoU: 0.32850298285484314
Best IoU possible = 0.7623727917671204
Random Candidate IoU = 0.0
Mean Upper Bound IoU: 0.3029884696006775, Mean Random IoU: 0.06472983956336975, Mean Model IoU: 0.1332654505968094
Top-1 acc@0.5:
Proposal Upper Bound: 0.3333333333333333, Random: 0.08333333333333333, Model: 0.16666666666666666
--------------------------------------------------
EVALUATION SUMMARY
Number of videos: 22
Mean IoU:
	Proposal Upper Bound: 0.42552584409713745
	Random: 0.15491734445095062
	Model: 0.14277729392051697
Top-1 accuracy@0.5:
	Proposal Upper Bound: 42.2%
	Random: 6.4%
	Model: 8.9%
--------------------------------------------------


In [None]:
train_loss, valid_loss = train(model, 8, 2, epochs=50, lr=1e-3)

In [None]:
from visualizer import inference

YCII = "/h/sagar/ece496-capstone/datasets/ycii"
FI = "/h/sagar/ece496-capstone/datasets/fi"

VG, RR = inference(model, 10, 1, FI)

In [None]:
# Saving and loading weights.

SAVE = False
LOAD = False

if SAVE:
    torch.save(model.state_dict(), "/h/sagar/ece496-capstone/weights/weights-epochs=50,bs=2,lr=1e-3,a=8")
    
if LOAD:
    model.load_state_dict(torch.load("/h/sagar/ece496-capstone/weights/weights-epochs=50,bs=2,lr=1e-3,a=8"))

In [None]:
# Reload modules.

import importlib
import visualizer

importlib.reload(visualizer)