In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# Install idx2numpy package for extracting data
!pip install idx2numpy

Collecting idx2numpy
  Downloading https://files.pythonhosted.org/packages/7e/6b/80628f6cc2f44d80b27f1ef7b57b257ed4c73766113b77d13ad110c091b4/idx2numpy-1.2.3.tar.gz
Building wheels for collected packages: idx2numpy
  Building wheel for idx2numpy (setup.py) ... [?25l[?25hdone
  Created wheel for idx2numpy: filename=idx2numpy-1.2.3-cp36-none-any.whl size=7905 sha256=86e2652dea9daf9fcce864e80d96c173414445d4c031494038626702ec92c2d4
  Stored in directory: /root/.cache/pip/wheels/7a/c1/da/284ce80a748fab898b8d1fa95468a386e7cf3b81da18511f9d
Successfully built idx2numpy
Installing collected packages: idx2numpy
Successfully installed idx2numpy-1.2.3


In [3]:
# Import packages
import os
import json
import gzip
import torch
import torchvision
import numpy as np 
import pandas as pd

import idx2numpy
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

In [4]:
def load_one_dataset(path):
    '''
    Convenience function to load a single dataset
    '''
    f = gzip.open(path, 'rb')
    data = torch.from_numpy(idx2numpy.convert_from_file(f))
    f.close()
    
    return(data)


def load_all_datasets(train_imgs, train_labs, test_imgs, test_labs, batch_size):
    '''
    Load training as well as test images here
    '''
    train_images = load_one_dataset(train_imgs).type(torch.float32)
    train_labels = load_one_dataset(train_labs).type(torch.long)
    train = list(zip(train_images, train_labels))

    test_images = load_one_dataset(test_imgs).type(torch.float32)
    test_labels = load_one_dataset(test_labs).type(torch.long)
    test = list(zip(test_images, test_labels))
    
    train_loader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True, num_workers=2)
    test_loader = torch.utils.data.DataLoader(test, batch_size=batch_size, shuffle=False, num_workers=2)
    
    return(train_loader, test_loader)

In [7]:
# Just for testing out noise function
data_dir = '/content/drive/MyDrive/data'
    
paths = {
      'train_imgs': os.path.join(data_dir, 'train-images-idx3-ubyte.gz'),
      'train_labs': os.path.join(data_dir, 'train-labels-idx1-ubyte.gz'),
      'test_imgs': os.path.join(data_dir,'t10k-images-idx3-ubyte.gz'),
      'test_labs': os.path.join(data_dir,'t10k-labels-idx1-ubyte.gz')
}

# Load datasets
train_loader, test_loader = load_all_datasets(**paths, batch_size = 32)

  


In [59]:
def add_noise(img, i, j, h, w, v):
  '''
  Randomly remove 1 or 2 quadrants
  from the input image.
  '''
  # Store the quadrant definitions: move this into training loop later
  quadrants = {
      
      1: [0, 0, 14, 14, 0], 
      2: [0, 14, 14, 14, 0],
      3: [14, 0, 14, 14, 0],
      4: [14, 14, 14, 14, 0],
  }

  # Get the number of quadrants to erase
  n_quads_to_erase = np.random.choice([1, 2])

  # Get which quadrants to erase
  quads_to_erase = np.random.choice([1, 2, 3, 4], size = n_quads_to_erase)
  
  # Create a copy of the image
  noisy_img = img.clone()

  # Now erase the quadrants
  for quad in quads_to_erase:
    noisy_img = torchvision.transforms.functional.erase(noisy_img, *quadrants[quad])
  
  # Return statement
  return(noisy_img)