# Task 3: Helper notebook for loading the data and saving the predictions

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
folder = "drive/MyDrive/AML_Pr3/"

In [3]:
import pickle
import gzip
import numpy as np
import os

### Helper functions

In [4]:
def load_zipped_pickle(filename):
    with gzip.open(filename, 'rb') as f:
        loaded_object = pickle.load(f)
        return loaded_object

In [5]:
def save_zipped_pickle(obj, filename):
    with gzip.open(filename, 'wb') as f:
        pickle.dump(obj, f, 2)

### Load data, make predictions and save prediction in correct format

In [6]:
# load data
train_data = load_zipped_pickle(folder + "train.pkl")
test_data = load_zipped_pickle(folder + "test.pkl")
samples = load_zipped_pickle(folder + "sample.pkl")

In [31]:
for t in train_data:
  print(t.keys())
  print(t['video'])
  break

dict_keys(['name', 'video', 'box', 'label', 'frames', 'dataset'])
[[[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]

 [[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]

 [[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]

 ...

 [[0 0 1 ... 3 0 0]
  [0 0 2 ... 0 2 1]
  [0 0 1 ... 0 0 1]
  ...
  [0 0 1 ... 1 2 0]
  [1 0 0 ... 1 2 0]
  [1 0 0 ... 2 2 0]]

 [[0 0 0 ... 3 0 0]
  [1 0 0 ... 0 3 2]
  [0 0 0 ... 2 1 0]
  ...
  [1 0 1 ... 0 1 0]
  [0 2 0 ... 0 0 0]
  [0 1 0 ... 0 0 0]]

 [[0 0 0 ... 0 0 0]
  [0 0 0 ... 3 2 0]
  [3 2 0 ... 2 3 0]
  ...
  [0 0 1 ... 4 1 1]
  [0 7 0 ... 1 1 1]
  [0 3 0 ... 0 0 1]]]


In [7]:
import torch
import torch.nn as nn
import torch.optim as optim

# Define the generator and discriminator networks
class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1)
        self.conv4 = nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1)
        self.conv5 = nn.Conv2d(512, 1, kernel_size=1, stride=1, padding=0)

    def forward(self, x):
        x = self.conv1(x)
        x = nn.functional.relu(x)
        x = self.conv2(x)
        x = nn.functional.relu(x)
        x = self.conv3(x)
        x = nn.functional.relu(x)
        x = self.conv4(x)
        x = nn.functional.relu(x)
        x = self.conv5(x)
        x = nn.functional.sigmoid(x)
        return x

class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        self.conv1 = nn.Conv2d(2, 64, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1)
        self.conv4 = nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1)
        self.conv5 = nn.Conv2d(512, 1, kernel_size=1, stride=1, padding=0)

    def forward(self, x):
        x = self.conv1(x)
        x = nn.functional.relu(x)
        x = self.conv2(x)
        x = nn.functional.relu(x)
        x = self.conv3(x)
        x = nn.functional.relu(x)
        x = self.conv4(x)
        x = nn.functional.relu(x)
        x = self.conv5(x)
        x = nn.functional.sigmoid(x)
        return x

In [42]:
def wasserstein_loss(y_true, y_pred):
 return mean(y_true) * mean(y_pred)
#from torch.nn.functional import wasserstein_distance
from torch.nn import SoftMarginLoss, SmoothL1Loss

In [43]:
# Set up the model, loss function, and optimizers
generator = Generator()
discriminator = Discriminator()
bce_loss = nn.BCELoss()
wasserstein_loss = SoftMarginLoss() #wasserstein_loss

d_optimizer = optim.Adam(discriminator.parameters(), lr=0.0002, betas=(0.5, 0.999))
g_optimizer = optim.Adam(generator.parameters(), lr=0.0002, betas=(0.5, 0.999))

num_epochs = 10
batch_size = 1000
# Train the model
for epoch in range(num_epochs):
    for t in train_data:
        images = torch.tensor(t['video'])
        labels = torch.tensor(t['label'])
        # Resize images and labels to match the input size of the generator
        img_size = t['video'].shape
        images = images.resize_(batch_size, 3, img_size[0], img_size[1])
        labels = labels.resize_(batch_size, 1, img_size[0], img_size[1])

        # Create a "fake" segmentation map using the generator
        fake_labels = generator(images)

        # Concatenate the real and fake images and labels
        real_fake_images = torch.cat((images, fake_labels), 1)
        real_fake_labels = torch.cat((labels, labels), 1)

        # Train the discriminator
        d_optimizer.zero_grad()
        d_output = discriminator(real_fake_images, real_fake_labels)
        d_real_loss = wasserstein_loss(d_output[:, :1], torch.ones(batch_size, 1, img_size, img_size))
        d_fake_loss = wasserstein_loss(d_output[:, 1:], torch.zeros(batch_size, 1, img_size, img_size))
        d_loss = d_real_loss + d_fake_loss
        d_loss.backward()
        d_optimizer.step()

        # Train the generator
        g_optimizer.zero_grad()
        g_output = discriminator(real_fake_images, real_fake_labels)
        g_loss = wasserstein_loss(g_output[:, 1:], torch.ones(batch_size, 1, img_size, img_size))
        g_loss.backward()
        g_optimizer.step()

RuntimeError: ignored

In [48]:
#### GANs
import torch
import torch.nn as nn
import torch.optim as optim

# Define the generator and discriminator networks
class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1)
        self.conv4 = nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1)
        self.conv5 = nn.Conv2d(512, 1, kernel_size=1, stride=1, padding=0)

    def forward(self, x):
        print(x)
        print(type(x))
        x = self.conv1(x)
        x = nn.functional.relu(x)
        x = self.conv2(x)
        x = nn.functional.relu(x)
        x = self.conv3(x)
        x = nn.functional.relu(x)
        x = self.conv4(x)
        x = nn.functional.relu(x)
        x = self.conv5(x)
        x = nn.functional.sigmoid(x)
        return x

class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        self.conv1 = nn.Conv2d(2, 64, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1)
        self.conv4 = nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1)
        self.conv5 = nn.Conv2d(512, 1, kernel_size=1, stride=1, padding=0)

    def forward(self, x):
        x = self.conv1(x)
        x = nn.functional.relu(x)
        x = self.conv2(x)
        x = nn.functional.relu(x)
        x = self.conv3(x)
        x = nn.functional.relu(x)
        x = self.conv4(x)
        x = nn.functional.relu(x)
        x = self.conv5(x)
        x = nn.functional.sigmoid(x)
        return x

# Set up the model, loss function, and optimizers
generator = Generator()
discriminator = Discriminator()
bce_loss = nn.BCELoss()
d_optimizer = optim.Adam(discriminator.parameters(), lr=0.0002)
g_optimizer = optim.Adam(generator.parameters(), lr=0.0002)

# Train the model
for epoch in range(num_epochs):
    for t in train_data:
        images = torch.tensor(t['video'])
        labels = torch.tensor(t['label'])
        
        # Resize images and labels to match the input size of the generator
        images = images.resize_(batch_size, 3, img_size[0], img_size[1])
        labels = labels.resize_(batch_size, 1, img_size[0], img_size[1])

        # Create a "fake" segmentation map using the generator
        fake_labels = generator(images)

        # Concatenate the real and fake images and labels
        real_fake_images = torch.cat((images, fake_labels), 1)
        real_fake_labels = torch.cat((labels, labels), 1)

        # Train the discriminator
        d_optimizer.zero_grad()
        d_output = discriminator(real_fake_images, real_fake_labels)
        d_real_loss = bce_loss(d_output[:, :1], torch.ones(batch_size, 1, img_size, img_size))
        d_fake_loss = bce_loss(d_output[:, 1:], torch.zeros(batch_size, 1, img_size, img_size))
        d_loss = d_real_loss + d_fake_loss
        d_loss.backward()
        d_optimizer.step()

        # Train the generator
        g_optimizer.zero_grad()
        g_output = discriminator(real_fake_images, real_fake_labels)
        g_loss = bce_loss(g_output[:, 1:], torch.ones(batch_size, 1, img_size, img_size))
        g_loss.backward()
        g_optimizer.step()

tensor([[[[0, 0, 0,  ..., 0, 0, 0],
          [0, 0, 0,  ..., 0, 0, 0],
          [0, 0, 0,  ..., 0, 0, 0],
          ...,
          [0, 0, 0,  ..., 0, 0, 0],
          [0, 0, 0,  ..., 0, 0, 0],
          [0, 0, 0,  ..., 0, 0, 0]],

         [[0, 0, 0,  ..., 0, 0, 0],
          [0, 0, 0,  ..., 0, 0, 0],
          [0, 0, 0,  ..., 0, 0, 0],
          ...,
          [0, 0, 0,  ..., 0, 0, 0],
          [0, 0, 0,  ..., 0, 0, 0],
          [0, 0, 0,  ..., 0, 0, 0]],

         [[0, 0, 0,  ..., 0, 0, 0],
          [0, 0, 0,  ..., 0, 0, 0],
          [0, 0, 0,  ..., 1, 1, 1],
          ...,
          [0, 0, 0,  ..., 0, 0, 0],
          [0, 0, 0,  ..., 0, 0, 0],
          [0, 0, 0,  ..., 0, 0, 0]]],


        [[[0, 0, 0,  ..., 0, 0, 0],
          [0, 0, 0,  ..., 0, 0, 0],
          [0, 0, 0,  ..., 0, 0, 0],
          ...,
          [0, 0, 0,  ..., 0, 0, 0],
          [0, 0, 0,  ..., 0, 0, 0],
          [0, 0, 0,  ..., 0, 0, 0]],

         [[0, 0, 0,  ..., 0, 0, 0],
          [0, 0, 0,  ..., 0, 0

RuntimeError: ignored

In [None]:
# make prediction for test
predictions = []
for d in test_data:
    prediction = np.array(np.zeros_like(d['video']), dtype=np.bool)
    height = prediction.shape[0]
    width = prediction.shape[1]
    prediction[int(height/2)-50:int(height/2+50), int(width/2)-50:int(width/2+50)] = True
    
    # DATA Strucure
    predictions.append({
        'name': d['name'],
        'prediction': prediction
        }
    )

In [None]:
# save in correct format
save_zipped_pickle(predictions, 'my_predictions.pkl')