In [1]:
import os
import time
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
import pytorch_ssim_changed as pytorch_ssim
from pytorch_msssim import ssim, ms_ssim, SSIM, MS_SSIM
import perceptual_loss
import cv2

from vfi.vfi_new import VFIModel

In [2]:
class Vimeo90kDataset(Dataset):
    def __init__(self, root, mode='train'):
        assert mode in ['train', 'test'], "Invalid mode, it must be either 'train' or 'test'."
        self.mode = mode
        self.root = root
        self.transforms = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
        ])
        
        # Load the list of sequences for the specified mode
        if self.mode == 'train':
            listfile = os.path.join(self.root, 'sep_trainlist.txt')
        else:
            listfile = os.path.join(self.root, 'sep_testlist.txt')
        with open(listfile) as f:
            self.sequences = [line.strip() for line in f.readlines()]
        
    def __getitem__(self, index):
        # Load the seven frames of the sequence
        seq_dir = os.path.join(self.root, 'sequence', self.sequences[index])
        frames = [cv2.imread(os.path.join(seq_dir, f'im{i}.png')) for i in range(1, 8)]

        # Apply transforms to each frame
        frames = [self.transforms(frame) for frame in frames]
        
        # Create input and target pairs
        inputs, targets = [], []
        for i in range(0, 5):
            inputs.append(torch.cat([frames[i], frames[i+2]], dim=0))
            targets.append(frames[i+1])
        
        # # Apply transforms
        # inputs = [self.transforms(input) for input in inputs]
        # targets = [self.transforms(target) for target in targets]
        
        # Return input and target pairs as a dictionary
        return {'inputs': inputs, 'targets': targets}
        
    def __len__(self):
        return len(self.sequences)

In [3]:
root_dir = "E:/Engineering/Capstone-Project/Datasets/vimeo_90k"

In [4]:
# Split the dataset into train and test sets
trainset = Vimeo90kDataset(root_dir, mode='train')
testset = Vimeo90kDataset(root_dir, mode='test')

In [5]:
# DataLoaders 
train_dataloader = DataLoader(trainset, batch_size=1, shuffle=True)
test_dataloader = DataLoader(testset, batch_size=1, shuffle=True)

In [6]:
# Define training parameters
batch_size = 16
num_epochs = 500
lr = 0.001

In [7]:
# Initialize the model, loss function, and optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = VFIModel().to(device)
# criterion = pytorch_ssim.SSIM(window_size=11)
criterion = SSIM(win_size=11, win_sigma=1.5, data_range=1, size_average=True, channel=1)
# criterion = perceptual_loss.PerceptualLoss(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [8]:
device

device(type='cuda')

In [9]:
params = list(model.parameters())
num_params = sum(p.numel() for p in params)
print(f"Number of parameters in the model: {num_params}")


Number of parameters in the model: 10778644


In [10]:
model

VFIModel(
  (optical_flow): FlowNet(
    (netFeatures): Features(
      (netOne): Sequential(
        (0): Conv2d(3, 32, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
        (1): LeakyReLU(negative_slope=0.1)
      )
      (netTwo): Sequential(
        (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
        (1): LeakyReLU(negative_slope=0.1)
        (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (3): LeakyReLU(negative_slope=0.1)
        (4): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (5): LeakyReLU(negative_slope=0.1)
      )
      (netThr): Sequential(
        (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
        (1): LeakyReLU(negative_slope=0.1)
        (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (3): LeakyReLU(negative_slope=0.1)
      )
      (netFou): Sequential(
        (0): Conv2d(64, 96, kernel_size=(3, 3), stride=(2, 2), padd

In [None]:
if os.path.exists('model_weights/vfi/vfi-last_epoch_weights.pt'):
    model.load_state_dict(torch.load('vfi-last_epoch_weights.pt'))

In [None]:
def write_epoch_step_no(epoch: int, step: int):
    path = "model_weights/vfi/vfi_epoch_step.bin"
    with open(path, "w") as f:
        f.write(f"Epoch: {epoch}, Step: {step}")

def read_epoch_step_no():
    path = "model_weights/vfi/vfi_epoch_step.bin"
    with open(path, "r") as f:
        content = f.read()
    epoch, step = map(int, [x.split(': ')[1] for x in content.split(',')])
    return epoch, step

In [12]:
torch.autograd.set_detect_anomaly(True)

if os.path.exists('model_weights/vfi/vfi-last_step_weights.pt'):
    model.load_state_dict(torch.load('model_weights/vfi/vfi-last_step_weights.pt'))
    print("Model loaded successfully..!!")

epoch_done, step_done = read_epoch_step_no()

# Training loop
num_epochs = 100
start_time = time.time()
for epoch in range(num_epochs):
    print(f'training... epoch: {epoch}')

    if epoch < epoch_done:
        continue
    
    model.train()
    model.optical_flow.eval()
    running_loss = 0.0

    # print(f'len(train_dataloader): {len(train_dataloader)}')
    total_steps = len(train_dataloader)
    epoch_start_time = time.time()

    for i, data in enumerate(train_dataloader):
        if i < step_done:
            continue
        # print(data)
        inputs, targets = data['inputs'], data['targets']
        # inputs, targets = inputs, targets = [input.to(device) for input in inputs], [target.to(device) for target in targets]
        targets = [Variable(target.to(device),  requires_grad=False) for target in targets]

        optimizer.zero_grad()

        # Forward pass
        outputs = [model(input.to(device)) for input in inputs]
        loss = torch.mean(torch.stack([1-criterion(output, target) for output, target in zip(outputs, targets)]))

        # Backward pass
        loss.backward()
        optimizer.step()

        ssim_value = torch.mean(torch.stack([ssim(output, target).item() for output, target in zip(outputs, targets)]))

        running_loss += loss.item()

        # Calculate the elapsed time and remaining time
        elapsed_time = time.time() - start_time
        remaining_time = (elapsed_time / (i+1)) * (total_steps - i - 1)

        torch.save(model.state_dict(), f'model_weights/vfi/vfi-last_step_weights.pt')
        write_epoch_step_no(epoch, i+1)
        # Print the number of steps, elapsed time, and remaining time
        print(f"Step {i+1}/{total_steps}, Loss: {loss.item()}, ssim_value: {ssim_value.item()}, Elapsed time: {elapsed_time:.2f}s, Remaining time: {remaining_time:.2f}s")

    # Print the average loss for this epoch
    epoch_end_time = time.time()
    epoch_elapsed_time = epoch_end_time - epoch_start_time
    print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {running_loss / len(train_dataloader)}, Elapsed time: {epoch_elapsed_time:.2f}s")

    torch.save(model.state_dict(), f'model_weights/vfi/vfi-last_epoch_weights.pt')

    # Evaluate the model on the test set
    if (epoch + 1) % 10 == 0:
        model.eval()
        test_loss = 0.0
        with torch.no_grad():
            for i, data in enumerate(test_dataloader):
                inputs, targets = data['inputs'], data['targets']
                inputs, targets = [input.to(device) for input in inputs], [target.to(device) for target in targets]
                # inputs, targets = inputs.to(device), targets.to(device)

                outputs = [model(input) for input in inputs]
                # loss = sum([criterion(output, target) for output, target in zip(outputs, targets)]) / len(outputs)
                loss = torch.mean(torch.stack([criterion(output, target.to(device)) for output, target in zip(outputs, targets)]))

                ssim_value = torch.mean(torch.stack([ssim(output, target).item() for output, target in zip(outputs, targets)]))

                test_loss += loss.item()

                # Print the number of steps
                print(f"Step {i+1}/{total_steps}, Loss: {loss.item()}, ssim_value: {ssim_value.item()}")

        print(f"Test Loss: {test_loss / len(test_dataloader)}")
        torch.save(model.state_dict(), f'model_weights/vfi/vfi-epoch_{epoch + 1}_weights.pt')


training... epoch: 0
frame1.size(): torch.Size([1, 3, 256, 448]), frame2.size(): torch.Size([1, 3, 256, 448])
intWidth: 448, intHeight: 256
frame1.size(): torch.Size([1, 3, 256, 448]), frame2.size(): torch.Size([1, 3, 256, 448])
intWidth: 448, intHeight: 256


OutOfMemoryError: CUDA out of memory. Tried to allocate 112.00 MiB (GPU 0; 4.00 GiB total capacity; 3.32 GiB already allocated; 0 bytes free; 3.36 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF