In [2]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms
from tqdm import tqdm
import matplotlib.pyplot as plt
import imageio.v3 as iio
from PIL import Image
import numpy as np
import torchmetrics
import config
import models as m
import utils as u
import gc
from segmentation_models_pytorch.losses import JaccardLoss


  from .autonotebook import tqdm as notebook_tqdm


## Copy hidden file into your scratch folder

- download the file from https://drive.google.com/drive/folders/1geJERvh-wODANvEJlnh_nB2QPOwU-cAG locally
- copy the file from local terminal to our project folder within your scratch directlory
    `scp hidden_set_for_leaderboard_1.zip <your netid>@greene.hpc.nyu.edu:/scratch/<your netid>/video_prediction_project
`
- unzip the file 
    `unzip hidden_set_for_leaderboard_1.zip`

In [None]:
torch.cuda.empty_cache()
gc.collect()

In [None]:
## Configuration used for pretrain and finetune
image_size=(160,240)
patch_size=config.pretrain_config['patch_size']
embed_dim=config.pretrain_config['embed_dim']
expander_out=config.pretrain_config['expander_out']
kernel_size=config.finetune_config['kernel_size']
padding=config.finetune_config['padding']
stride=config.finetune_config['stride']
batch_size=config.finetune_config['batch_size']
num_epochs=config.finetune_config['num_epochs']
lr=config.finetune_config['lr']

## Load the models  
# use cpu
device = "cpu"
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# we should also manually update pretrain path in case we do pretrain and finetune at the same time
# the model_id for pretrain wouldn't have been updated in config
VICReg_model_path = 'VICReg_pretrained_1682887688.pth'  #config.pretrain_config['model_id'] 
VICReg_model = m.VICReg(image_size, patch_size, embed_dim, expander_out=expander_out).to(device)
VICReg_model.load_state_dict(torch.load(VICReg_model_path))
VICReg_model.eval()
print(f'loaded model {config.pretrain_config["model_id"]}')

# final_model_path = 'video_predictor_finetuned_1682558449.pth'
final_model_path = 'video_predictor_finetuned_best_val_1682889093.pth' #Jaccard 0.0192
# final_model_path = 'video_predictor_finetuned_best_val_1682895029.pth' #Jaccard 0.0192
video_prediction_model = m.VideoPredictor(VICReg_model, kernel_size, padding, stride).to(device)
video_prediction_model.load_state_dict(torch.load(final_model_path))
video_prediction_model = video_prediction_model.to(device)
video_prediction_model.eval()

del VICReg_model
gc.collect()
torch.cuda.empty_cache()

jaccard = torchmetrics.JaccardIndex(task="multiclass", num_classes=49)
criterion = JaccardLoss(mode='multiclass', classes=49) 

In [None]:
## Recompute Jaccard loss on train and validation set. 
train_folder="Dataset_Student/train/"

train_dataloader = u.create_finetune_dataloader(train_folder, image_size, batch_size= 1000, train_or_val='train')

for i, data in enumerate(tqdm(train_dataloader)):
    x_train, y_train = data
    y_train_pred = video_prediction_model(x_train).argmax(dim=1) 
    del x_train
    print("Optimized loss", criterion(y_train_pred, y_train))
    print("Jaccard Index loss:", jaccard(y_train_pred, y_train))
    del y_train_pred, y_train
    
del train_dataloader
gc.collect()


In [None]:
val_folder="Dataset_Student/val/"

val_dataloader = u.create_finetune_dataloader(val_folder, image_size, batch_size= 1000, train_or_val='val')

for i, data in enumerate(tqdm(val_dataloader)):
    x_val, y_val = data
    y_val_pred = video_prediction_model(x_val).argmax(dim=1) 
    del x_val
    print("Optimized loss", criterion(y_val_pred, y_val))
    print("Jaccard Index loss:", jaccard(y_val_pred, y_val))
    del y_val_pred, y_val
    
del val_dataloader
gc.collect()

In [3]:
hidden_folder="hidden/"
num_hidden_videos = len( [hidden_folder + v for v in os.listdir(hidden_folder)])
print(num_hidden_videos)
hidden_dataloader = u.create_hidden_dataloader(hidden_folder, image_size, batch_size= num_hidden_videos, hidden_set='hidden')

2000


In [None]:
len_data = 0
for data in hidden_dataloader:
    len_data += data.size(0)
print(len_data)

output = torch.zeros(len_data, 160, 240)

b_index = 0
for data in tqdm(hidden_dataloader, desc='generating outputs'):
    data = data.to(device)
    y_pred = video_prediction_model(data)
    y_pred = y_pred.argmax(dim=1)
    output[b_index:b_index+batch_size] = y_pred
    b_index += batch_size
torch.save(output, 'submitted_tensor.pt')
