In [1]:
# This mounts your Google Drive to the Colab VM.
from google.colab import drive
drive.mount('/content/drive')

# TODO: Enter the foldername in your Drive where you have saved the unzipped
# assignment folder, e.g. 'cse493g1/assignments/assignment2/'
FOLDERNAME = 'cse493g1/cse493g1project/'
assert FOLDERNAME is not None, "[!] Enter the foldername."

# Now that we've mounted your Drive, this ensures that
# the Python interpreter of the Colab VM can load
# python files from within it.
import sys
sys.path.append('/content/drive/My Drive/{}'.format(FOLDERNAME))

Mounted at /content/drive


In [2]:
import os
import torch
import numpy as np
import matplotlib.pyplot as plt

from model_trainer import Trainer
from Model import GraphCaptioningModel
from model_utils import decode_captions, create_minibatch, encode_captions

In [3]:
import torch
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
import ast
import re

class GraphImageDataset(Dataset):
    def __init__(self, csv_files, transform=None):
        self.data = pd.concat([pd.read_csv(file) for file in csv_files], ignore_index=True)
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
      x, y = self.data.iloc[idx]
      x_out = str(x)
      y_out = str(y)
      return x_out, y_out

In [4]:
csv_files2 = ['/content/drive/My Drive/cse493g1/cse493g1project/datasets/datasets_small/data_kk1.csv', '/content/drive/My Drive/cse493g1/cse493g1project/datasets/datasets_small/data_cr1.csv',
              '/content/drive/My Drive/cse493g1/cse493g1project/datasets/datasets_small/data_gv1.csv', '/content/drive/My Drive/cse493g1/cse493g1project/datasets/datasets_small/data_sp1.csv',
              '/content/drive/My Drive/cse493g1/cse493g1project/datasets/datasets_medium/data_kk1_medium.csv', '/content/drive/My Drive/cse493g1/cse493g1project/datasets/datasets_medium/data_cr1_medium.csv',
              '/content/drive/My Drive/cse493g1/cse493g1project/datasets/datasets_medium/data_gv1_medium.csv', '/content/drive/My Drive/cse493g1/cse493g1project/datasets/datasets_medium/data_sp1_medium.csv']

dataset_mixed = GraphImageDataset(csv_files=csv_files2)

In [5]:
import PIL
import torchvision.transforms.functional as F
from torchvision.utils import make_grid
from torchvision.io import read_image
import torchvision.transforms as transform
from pathlib import Path

raw_data_clr = {}
clr_len = dataset_mixed.__len__()

graph_list = []
caption_list = []
for i in np.random.choice(clr_len, 600):
  graph_path, caption = dataset_mixed.__getitem__(i)
  graph = F.pil_to_tensor(PIL.Image.open('/content/drive/My Drive/cse493g1/cse493g1project/datasets' + graph_path).convert('RGB'))
  graph_list.append(np.array([graph.numpy()]).reshape(graph.shape))
  caption_list.append(caption)
raw_data_clr['features'] = np.array(graph_list)
raw_data_clr['captions'] = np.array(caption_list)

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

from model_trainer import Trainer
from Model import GraphCaptioningModel
from model_utils import decode_captions, create_minibatch, encode_captions

torch.manual_seed(493)
np.random.seed(493)

import gc

gc.collect()

data_clr = {}

data_clr['idx_to_word'] = ['<NULL>', '<START>', '<END>']
for i in range(100):
  data_clr['idx_to_word'].append(str(i))
punc = ['{', '}', '[', ']', '(', ')', ':', ',', ' ']
for p in punc:
  data_clr['idx_to_word'].append(p)

data_clr['word_to_idx'] = {}
for i in range(len(data_clr['idx_to_word'])):
  data_clr['word_to_idx'][data_clr['idx_to_word'][i]] = i


encoded_captions = encode_captions(raw_data_clr['captions'][:600], data_clr['word_to_idx'])
features = raw_data_clr['features'][:600]

data_clr['train_captions'] = encoded_captions[:500]
data_clr['train_features'] = features[:500]

data_clr['val_captions'] = encoded_captions[500:600]
data_clr['val_features'] = features[500:600]

data_clr['test_captions'] = encoded_captions
data_clr['test_features'] = features

graph_model_clr = GraphCaptioningModel(
          word_to_idx=data_clr['word_to_idx'],
          wordvec_dim=256,
          max_length=600
        )

model_solver_clr = Trainer(graph_model_clr, data_clr, idx_to_word=data_clr['idx_to_word'],
          num_epochs=5,
          batch_size=3,
          learning_rate=0.0005,
          verbose=True, print_every=10,
        )

model_solver_clr.train()

# Plot the training losses.
plt.plot(model_solver_clr.loss_history)
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.title('Training loss history')
plt.show()

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 147MB/s]


(Iteration 1 / 830) loss: 4.974954
(Iteration 11 / 830) loss: 2.453613
(Iteration 21 / 830) loss: 2.686307
(Iteration 31 / 830) loss: 1.896051
(Iteration 41 / 830) loss: 2.170106
(Iteration 51 / 830) loss: 2.573402
(Iteration 61 / 830) loss: 2.008980
(Iteration 71 / 830) loss: 2.137163
(Iteration 81 / 830) loss: 2.389022
(Iteration 91 / 830) loss: 1.762740
(Iteration 101 / 830) loss: 2.171813
(Iteration 111 / 830) loss: 2.256003
(Iteration 121 / 830) loss: 2.287055
(Iteration 131 / 830) loss: 2.147652


In [None]:
for split in ['val', 'train']:
    minibatch = create_minibatch(data_clr, split=split, batch_size=1)
    gt_captions, features = minibatch
    gt_captions = decode_captions(gt_captions.astype(int), data_clr['idx_to_word'])

    sample_captions = graph_model_clr.sample(features, max_length=100)
    sample_captions = decode_captions(sample_captions, data_clr['idx_to_word'])

    for gt_caption, sample_caption, features in zip(gt_captions, sample_captions, features):
        # Skip missing URLs.
        plt.imshow(features.T)
        plt.title('%s\n%s\nGT:%s' % (split, sample_caption, gt_caption))
        plt.axis('off')
        plt.show()