In [1]:
!mkdir dataset
!gdown 1xijq32XfEm6FPhUb7RsZYWHc2UuwVkiq
!mv refcocog.tar.gz ./dataset/
!ls dataset

Downloading...
From: https://drive.google.com/uc?id=1xijq32XfEm6FPhUb7RsZYWHc2UuwVkiq
To: /content/refcocog.tar.gz
100% 13.5G/13.5G [05:34<00:00, 40.2MB/s]
refcocog.tar.gz


In [2]:
!tar -xf dataset/refcocog.tar.gz -C dataset
!ls dataset

refcocog  refcocog.tar.gz


In [4]:
import torch
import torchvision
import torch.nn.functional as F
import torchvision.transforms as T
from torch.utils.data import Dataset
from torch.utils.tensorboard import SummaryWriter
import os

In [5]:
class RefCOCOgDataset(Dataset):
  """A simple dataset representing the numbers from 0 to size-1"""

  def __init__(self, size, root, transform=None, split=True):
    super(RefCOCOgDataset, self).__init__()
    self.root = root = os.path.expanduser(root)
    self.size = size
    self.transform = transform
    self.split = split
 

  def __getitem__(self, idx):
    """Get an item given its id.

    Args:
      idx: the integral index of the element to retrieve

    Returns:
      element at index idx
    """
    result = torch.tensor([idx], dtype=torch.float32)


    # if a transformation is available, we apply it
    if self.transform is not None:
      result = self.transform(result)
    
    # create train and test splits (80/20)
    num_samples = len(result)
    training_samples = int(num_samples * 0.8 + 1)
    test_samples = num_samples - training_samples
    if self.split:
      training_data, test_data = torch.utils.data.random_split(result, [training_samples, test_samples])

    return training_data, test_data

  def __len__(self):
    """Get the length of the dataset.

    Returns:
      number of elements that compose the dataset
    """
    return self.size

In [6]:
def data_augmentation():

  transformations_sequence = [
      # random changes in pixel colors
      T.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1),
      # resize each PIL image to 256 x 256
      T.Resize((256, 256)),                   
      T.RandomCrop((224, 224)),
      # the former transformations accept and return PIL Image objects, now convert to Tensor
      T.ToTensor(),
      # apply normalization
      T.Normalize(mean=[0.4913, 0.4821, 0.4465], std=[0.2470, 0.2434, 0.2615])
  ]
  # build a chain of transformations


  composed_transformation = T.Compose(transformations_sequence)

  return composed_transformation

In [7]:
def get_data(root, file_size, batch_size, test_batch_size=256):
  
  # prepare data transformations and then combine them sequentially

  training_data, test_data = RefCOCOgDataset(file_size, root, data_augmentation, True)

  # initialize dataloaders
  train_loader = torch.utils.data.DataLoader(training_data, batch_size, shuffle=True, drop_last=True)
  test_loader = torch.utils.data.DataLoader(test_data, test_batch_size, shuffle=False)

  return train_loader, test_loader

In [None]:
def main(batch_size=128, 
         device='cuda:0',
         learning_rate=0.001, 
         weight_decay=0.000001, 
         momentum=0.9, 
         epochs=50, 
         num_classes=65, 
         visualization_name='alexnet_sgd', 
         root=None):
  
  writer = SummaryWriter(log_dir="runs/exp1")

    # instantiates dataloaders
  train_loader, test_loader = get_data(root, batch_size=batch_size, test_batch_size=None)
  
#main(14495514624, "/content/dataset/refcocog")