<a href="https://colab.research.google.com/github/aevangeline/forensics/blob/master/10701_Work.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


Aurelia and Caroline - 10701

In [1]:
!nvcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2019 NVIDIA Corporation
Built on Sun_Jul_28_19:07:16_PDT_2019
Cuda compilation tools, release 10.1, V10.1.243


In [2]:
!pip install --extra-index-url https://developer.download.nvidia.com/compute/redist/cuda/10.0 nvidia-dali

Looking in indexes: https://pypi.org/simple, https://developer.download.nvidia.com/compute/redist/cuda/10.0
Collecting nvidia-dali
[?25l  Downloading https://developer.download.nvidia.com/compute/redist/cuda/10.0/nvidia-dali/nvidia_dali-0.15.0-947079-cp36-cp36m-manylinux1_x86_64.whl (43.3MB)
[K     |████████████████████████████████| 43.3MB 220kB/s 
Installing collected packages: nvidia-dali
Successfully installed nvidia-dali-0.15.0


In [0]:
from zipfile import ZipFile
import pathlib
from urllib.request import urlretrieve
import os
from os import remove
import os.path
import numpy as np
import pandas as pd
import shutil as sh
import torch
import glob
from torch import nn
from torch import optim
import torch.nn.functional as F
from torchvision import datasets, transforms, models
from torch.utils.data.sampler import SubsetRandomSampler
import nvidia.dali.ops as ops
import nvidia.dali.types as types
from nvidia.dali.pipeline import Pipeline

In [0]:
class ExternalInputIterator(object):
    def __init__(self, batch_size, data_file, image_dir, shuffle_files=True):
        self.images_dir = image_dir
        self.batch_size = batch_size
        self.data_file = data_file
        self.shuffle_files = shuffle_files
        with open(self.data_file, 'r') as f:
            self.files = [line.rstrip() for line in f if line is not '']
        self.idxs = []

    def __iter__(self):
        self.n = len(self.files)
        return self

    def __next__(self):
        batch = []
        labels = []
        for _ in range(self.batch_size):
            jpeg_filename, text, *label = self.files[self.get_idx()].split(' ')
            f = open(image_dir + jpeg_filename, 'rb')
            batch.append(np.frombuffer(f.read(), dtype = np.uint8))
            labels.append(np.array(label, dtype = np.uint8))
        return (batch, labels)
    
    def get_idx(self):
        if len(self.idxs) == 0:
            print("Shuffling")
            self.idxs = list(range(self.n))
            if self.shuffle_files:
                shuffle(self.idxs)
        return self.idxs.pop()
      
    next = __next__

In [0]:
class ExternalInputDataset(data.Dataset):
    def __init__(self, batch_size, data_file, image_dir, shuffle_files=True):
        self.images_dir = image_dir
        self.batch_size = batch_size
        self.data_file = data_file
        self.shuffle_files = shuffle_files
        with open(self.data_file, 'r') as f:
            self.files = [line.rstrip() for line in f if line is not '']
        self.idxs = []

    def __len__(self):
        return len(self.files)
        
    def __getitem__(self, index):
        jpeg_filename, *label = self.files[index].split(' ')
        f = open(image_dir + jpeg_filename, 'rb')
        image = np.frombuffer(f.read(), dtype = np.uint8)
        label = np.array(label, dtype = np.uint8)
        return image, label

In [0]:
!wget -cq https://s3.amazonaws.com/content.udacity-data.com/courses/nd188/flower_data.zip
!unzip -qq flower_data.zip
!mkdir -p ./flower_data/flower_data_flat
!find ./flower_data/train -mindepth 2 -type f -exec mv -t ./flower_data/flower_data_flat -i '{}' +

In [0]:
image_dir = "./flower_data/flower_data_flat/"

In [0]:
from os import listdir
from os.path import isfile, join
image_files = [f for f in listdir(image_dir) if isfile(join(image_dir, f))]

In [0]:
image_files

['image_00804.jpg',
 'image_02296.jpg',
 'image_07560.jpg',
 'image_06496.jpg',
 'image_06278.jpg',
 'image_02250.jpg',
 'image_02036.jpg',
 'image_04541.jpg',
 'image_00326.jpg',
 'image_04518.jpg',
 'image_06809.jpg',
 'image_00813.jpg',
 'image_04479.jpg',
 'image_02985.jpg',
 'image_03318.jpg',
 'image_02673.jpg',
 'image_07203.jpg',
 'image_07991.jpg',
 'image_03076.jpg',
 'image_02499.jpg',
 'image_00975.jpg',
 'image_01188.jpg',
 'image_05239.jpg',
 'image_04291.jpg',
 'image_03167.jpg',
 'image_01499.jpg',
 'image_07503.jpg',
 'image_01884.jpg',
 'image_00803.jpg',
 'image_03213.jpg',
 'image_06590.jpg',
 'image_07070.jpg',
 'image_06499.jpg',
 'image_05963.jpg',
 'image_05533.jpg',
 'image_04422.jpg',
 'image_01657.jpg',
 'image_06967.jpg',
 'image_07990.jpg',
 'image_05010.jpg',
 'image_02728.jpg',
 'image_02861.jpg',
 'image_03415.jpg',
 'image_01437.jpg',
 'image_07762.jpg',
 'image_04510.jpg',
 'image_06834.jpg',
 'image_07332.jpg',
 'image_06680.jpg',
 'image_00127.jpg',


In [0]:
data_frame = pd.DataFrame(list(zip(image_files, 
                               list(range(len(image_files))), 
                               list(range(len(image_files))))), 
                          columns=['image_filename', 'label_1', 'label_2'])

In [0]:
data_frame.head()

Unnamed: 0,image_filename,label_1,label_2
0,image_00804.jpg,0,0
1,image_02296.jpg,1,1
2,image_07560.jpg,2,2
3,image_06496.jpg,3,3
4,image_06278.jpg,4,4


In [0]:
import PIL.Image

In [0]:
for file in data_frame['image_filename']:
  PIL.Image.open(image_dir + file).convert('RGB')

In [0]:
data_frame.to_csv('dummy_data.ssv', index=False, header=False, sep=" ")

In [0]:
dataset = ExternalInputDataset(batch_size=16, 
                            data_file='dummy_data.ssv', 
                            image_dir=image_dir)

In [0]:
dataset[0]

(array([255, 216, 255, ..., 103, 255, 217], dtype=uint8),
 array([0, 0], dtype=uint8))

In [0]:
def collate_fn(batch):
  images = [item[0] for item in batch]
  labels = [item[1] for item in batch]
  return images, labels

In [0]:
dataset_loader = data.DataLoader(dataset, batch_size=16, shuffle=True, num_workers=1, pin_memory=False, collate_fn=collate_fn)

In [0]:
iter(dataset_loader).next()

([array([255, 216, 255, ..., 143, 255, 217], dtype=uint8),
  array([255, 216, 255, ...,  63, 255, 217], dtype=uint8),
  array([255, 216, 255, ...,  89, 255, 217], dtype=uint8),
  array([255, 216, 255, ...,  96, 255, 217], dtype=uint8),
  array([255, 216, 255, ..., 200, 255, 217], dtype=uint8),
  array([255, 216, 255, ..., 159, 255, 217], dtype=uint8),
  array([255, 216, 255, ...,   3, 255, 217], dtype=uint8),
  array([255, 216, 255, ...,  63, 255, 217], dtype=uint8),
  array([255, 216, 255, ..., 177, 255, 217], dtype=uint8),
  array([255, 216, 255, ...,  63, 255, 217], dtype=uint8),
  array([255, 216, 255, ...,  79, 255, 217], dtype=uint8),
  array([255, 216, 255, ..., 159, 255, 217], dtype=uint8),
  array([255, 216, 255, ..., 104, 255, 217], dtype=uint8),
  array([255, 216, 255, ...,  88, 255, 217], dtype=uint8),
  array([255, 216, 255, ...,   3, 255, 217], dtype=uint8),
  array([255, 216, 255, ..., 127, 255, 217], dtype=uint8)],
 [array([13, 13], dtype=uint8),
  array([88, 88], dtype

In [0]:
eii = ExternalInputIterator(batch_size=16, 
                            data_file='dummy_data.ssv', 
                            image_dir=image_dir)
iterator = iter(eii)

In [0]:
im, lab = next(iterator)

Shuffling


In [0]:
im

[array([255, 216, 255, ..., 127, 255, 217], dtype=uint8),
 array([255, 216, 255, ...,   1, 255, 217], dtype=uint8),
 array([255, 216, 255, ..., 127, 255, 217], dtype=uint8),
 array([255, 216, 255, ..., 136, 255, 217], dtype=uint8),
 array([255, 216, 255, ..., 103, 255, 217], dtype=uint8),
 array([255, 216, 255, ..., 159, 255, 217], dtype=uint8),
 array([255, 216, 255, ...,  92, 255, 217], dtype=uint8),
 array([255, 216, 255, ..., 207, 255, 217], dtype=uint8),
 array([255, 216, 255, ...,  81, 255, 217], dtype=uint8),
 array([255, 216, 255, ...,  71, 255, 217], dtype=uint8),
 array([255, 216, 255, ...,   8, 255, 217], dtype=uint8),
 array([255, 216, 255, ...,   7, 255, 217], dtype=uint8),
 array([255, 216, 255, ..., 143, 255, 217], dtype=uint8),
 array([255, 216, 255, ...,  51, 255, 217], dtype=uint8),
 array([255, 216, 255, ...,  63, 255, 217], dtype=uint8),
 array([255, 216, 255, ..., 127, 255, 217], dtype=uint8)]

In [0]:
class ExternalSourcePipeline(Pipeline):
    def __init__(self, source, batch_size, num_threads, device_id):
        super(ExternalSourcePipeline, self).__init__(batch_size,
                                      num_threads,
                                      device_id,
                                      seed=12)
        self.source = source
        self.source_iter = iter(source)
        self.input = ops.ExternalSource()
        self.input_label = ops.ExternalSource()
        self.decode = ops.nvJPEGDecoder(device = "mixed", output_type = types.RGB)
        self.res = ops.Resize(device="gpu", resize_x=224, resize_y=224, interp_type=types.INTERP_TRIANGULAR)
        self.cast = ops.Cast(device = "gpu", dtype = types.INT32)

    def define_graph(self):
        self.jpegs = self.input()
        self.labels = self.input_label()
        images = self.decode(self.jpegs)
        output = self.res(images)
        return (output, self.labels)

    def iter_setup(self):
        try:
          p = self.source_iter.next()
        except:
          print("Exception occured")
          self.source_iter = iter(self.source)
          p = self.source_iter.next()
        images, labels = p
        self.feed_input(self.jpegs, images)
        self.feed_input(self.labels, labels)

In [0]:
pipe = ExternalSourcePipeline(source=dataset_loader, batch_size=16, num_threads=4, device_id = 0)
pipe.build()



In [0]:
from nvidia.dali.plugin.pytorch import DALIGenericIterator

In [0]:
len(dataset)

6552

In [0]:
len(dataset_loader)

410

In [0]:
dali_iter = DALIGenericIterator([pipe], ['images', 'labels'], 400*16)

In [0]:
#%%time
import time
start = time.time()
for epoch in range(2):
  for i, it in enumerate(dali_iter, 5):
    batch_data = it[0]
    images, labels = batch_data["images"], batch_data["labels"]
    #print(len(images))
  dali_iter.reset()
print(time.time() - start)

RuntimeError: ignored