In [1]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms
from tqdm import tqdm
import matplotlib.pyplot as plt
import imageio.v3 as iio
from PIL import Image
import numpy as np
import torchmetrics
import config
import models as m
import utils as u
import gc
from segmentation_models_pytorch.losses import JaccardLoss


  from .autonotebook import tqdm as notebook_tqdm


## Copy hidden file into your scratch folder

- download the file from https://drive.google.com/drive/folders/1geJERvh-wODANvEJlnh_nB2QPOwU-cAG locally
- copy the file from local terminal to our project folder within your scratch directlory
    `scp hidden_set_for_leaderboard_1.zip <your netid>@greene.hpc.nyu.edu:/scratch/<your netid>/video_prediction_project
`
- unzip the file 
    `unzip hidden_set_for_leaderboard_1.zip`

In [2]:
torch.cuda.empty_cache()
gc.collect()

0

In [3]:
## Configuration used for pretrain and finetune
image_size=(160,240)
patch_size=config.pretrain_config['patch_size']
embed_dim=config.pretrain_config['embed_dim']
expander_out=config.pretrain_config['expander_out']
kernel_size= config.finetune_config['kernel_size']
padding= config.finetune_config['padding']
stride=config.finetune_config['stride']
batch_size= config.finetune_config['batch_size']
num_epochs=config.finetune_config['num_epochs']
lr=config.finetune_config['lr']

## Load the models  
# use cpu
device = "cpu"

VICReg_model_path = 'VICReg_pretrained_1682959065.pth'
VICReg_model = m.VICReg(image_size, patch_size, embed_dim, expander_out=expander_out).to(device)
VICReg_model.load_state_dict(torch.load(VICReg_model_path))
VICReg_model.eval()

final_model_path = 'video_predictor_finetuned_best_val_1682959078.pth'
video_prediction_model = m.VideoPredictor(VICReg_model, kernel_size, padding, stride).to(device)
video_prediction_model.load_state_dict(torch.load(final_model_path))
video_prediction_model = video_prediction_model.to(device)
video_prediction_model.eval()

del VICReg_model
gc.collect()
torch.cuda.empty_cache()

jaccard = torchmetrics.JaccardIndex(task="multiclass", num_classes=49)
criterion = JaccardLoss(mode='multiclass', classes=49) 

In [4]:
## Recompute Jaccard loss on train and validation set. 
train_folder="Dataset_Student/train/"
num_train_videos = len( [train_folder + v for v in os.listdir(train_folder)])

train_dataloader = u.create_finetune_dataloader(train_folder, image_size, batch_size= num_train_videos, train_or_val='train')

for i, data in enumerate(tqdm(train_dataloader)):
    x_train, y_train = data
    y_train_pred = video_prediction_model(x_train)
    del x_train
    print("Jaccard Index loss:", jaccard(y_train_pred.argmax(dim=1), y_train))
    print("Optimized loss", criterion(y_train_pred.log_softmax(dim=1).exp() , y_train))
    del y_train_pred, y_train
    
del train_dataloader
gc.collect()


loading train finetuning data from disk...
done


  0%|          | 0/1 [00:00<?, ?it/s]

Jaccard Index loss: tensor(0.0193)
Optimized loss tensor(0.9979, grad_fn=<MeanBackward0>)


100%|██████████| 1/1 [00:35<00:00, 35.71s/it]


0

In [5]:
val_folder="Dataset_Student/val/"
num_val_videos = len( [val_folder + v for v in os.listdir(val_folder)])

val_dataloader = u.create_finetune_dataloader(val_folder, image_size, batch_size= num_val_videos, train_or_val='val')

for i, data in enumerate(tqdm(val_dataloader)):
    x_val, y_val = data
    y_val_pred = video_prediction_model(x_val)
    del x_val
    print("Jaccard Index loss:", jaccard(y_val_pred.argmax(dim=1) , y_val))
    print("Optimized loss", criterion(y_val_pred.log_softmax(dim=1).exp(), y_val))
    del y_val_pred, y_val
    
del val_dataloader
gc.collect()

loading val finetuning data from disk...
done


  0%|          | 0/1 [00:00<?, ?it/s]

Jaccard Index loss: tensor(0.0193)
Optimized loss tensor(0.9979, grad_fn=<MeanBackward0>)


100%|██████████| 1/1 [00:32<00:00, 32.13s/it]


0

In [6]:
hidden_folder="hidden/"
batch_size = len( [hidden_folder + v for v in os.listdir(hidden_folder)])
print(batch_size)
hidden_dataloader = u.create_hidden_dataloader(hidden_folder, image_size, batch_size= batch_size, hidden_set='hidden')

2000
loading hidden finetuning data from disk...
done


In [7]:
len_data = 0
for data in hidden_dataloader:
    len_data += data.size(0)
print(len_data)

output = torch.zeros(len_data, 160, 240)

b_index = 0
for data in tqdm(hidden_dataloader, desc='generating outputs'):
    data = data.to(device)
    y_pred = video_prediction_model(data)
    y_pred = y_pred.argmax(dim=1)
    output[b_index:b_index+batch_size] = y_pred
    b_index += batch_size
torch.save(output, 'submitted_tensor_team12.pt')


2000


generating outputs: 100%|██████████| 1/1 [00:28<00:00, 28.21s/it]


In [8]:
y_pred.unique()

tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 15, 16, 17, 18,
        19, 20, 21, 22, 23, 25, 26, 27, 28, 29, 31, 32, 33, 34, 35, 36, 37, 38,
        39, 40, 42, 43, 44, 45, 46, 47, 48])

In [9]:
data.unique()

tensor([0.0000, 0.0039, 0.0078, 0.0118, 0.0157, 0.0196, 0.0235, 0.0275, 0.0314,
        0.0353, 0.0392, 0.0431, 0.0471, 0.0510, 0.0549, 0.0588, 0.0627, 0.0667,
        0.0706, 0.0745, 0.0784, 0.0824, 0.0863, 0.0902, 0.0941, 0.0980, 0.1020,
        0.1059, 0.1098, 0.1137, 0.1176, 0.1216, 0.1255, 0.1294, 0.1333, 0.1373,
        0.1412, 0.1451, 0.1490, 0.1529, 0.1569, 0.1608, 0.1647, 0.1686, 0.1725,
        0.1765, 0.1804, 0.1843, 0.1882, 0.1922, 0.1961, 0.2000, 0.2039, 0.2078,
        0.2118, 0.2157, 0.2196, 0.2235, 0.2275, 0.2314, 0.2353, 0.2392, 0.2431,
        0.2471, 0.2510, 0.2549, 0.2588, 0.2627, 0.2667, 0.2706, 0.2745, 0.2784,
        0.2824, 0.2863, 0.2902, 0.2941, 0.2980, 0.3020, 0.3059, 0.3098, 0.3137,
        0.3176, 0.3216, 0.3255, 0.3294, 0.3333, 0.3373, 0.3412, 0.3451, 0.3490,
        0.3529, 0.3569, 0.3608, 0.3647, 0.3686, 0.3725, 0.3765, 0.3804, 0.3843,
        0.3882, 0.3922, 0.3961, 0.4000, 0.4039, 0.4078, 0.4118, 0.4157, 0.4196,
        0.4235, 0.4275, 0.4314, 0.4353, 