In [2]:
import numpy as np
import argparse
import math
import os
import torch
import torch.optim as optim
import torch.utils.data as data
from torchvision import datasets, transforms
import pickle
# import lasagne
import matplotlib.pyplot as plt

In [3]:
# Functions provided by the website (https://patrykchrabaszcz.github.io/Imagenet32/)

def unpickle(file):
    with open(file, 'rb') as fo:
        dict = pickle.load(fo)
    return dict

def load_databatch(data_folder, idx, img_size=64):
    data_file = os.path.join(data_folder, 'train_data_batch_')

    d = unpickle(data_file + str(idx))
    x = d['data']
    y = d['labels']
    mean_image = d['mean']

    x = x/np.float32(255)
    mean_image = mean_image/np.float32(255)

    # Labels are indexed from 1, shift it so that indexes start at 0
    y = [i-1 for i in y]
    data_size = x.shape[0]

    x -= mean_image

    img_size2 = img_size * img_size

    x = np.dstack((x[:, :img_size2], x[:, img_size2:2*img_size2], x[:, 2*img_size2:]))
    x = x.reshape((x.shape[0], img_size, img_size, 3)).transpose(0, 3, 1, 2)

    # create mirrored images
    X_train = x[0:data_size, :, :, :]
    Y_train = y[0:data_size]
    X_train_flip = X_train[:, :, :, ::-1]
    Y_train_flip = Y_train
    X_train = np.concatenate((X_train, X_train_flip), axis=0)
    Y_train = np.concatenate((Y_train, Y_train_flip), axis=0)

    return dict(
        X_train=(X_train), # lasagne.utils.floatX(X_train),
        Y_train=Y_train.astype('int32'),
        mean=mean_image)

In [4]:
d = unpickle('/Users/dj/Desktop/MT_Desktop/ImageNet/ImageNet64/Imagenet64_train/train_data_batch_1')
x = d['data']
y = d['labels']
mean_image = d['mean']

In [6]:
dat_batch = load_databatch(data_folder = '/Users/dj/Desktop/MT_Desktop/ImageNet/ImageNet64/Imagenet64_train', idx = 1, img_size=64)

In [7]:
data_file = os.path.join('/Users/dj/Desktop/MT_Desktop/ImageNet/ImageNet64/Imagenet64_train', 'train_data_batch_')

img_size = 64

d = unpickle(data_file + str(1))
x = d['data']
y = d['labels']
mean_image = d['mean']

x = x/np.float32(255)
mean_image = mean_image/np.float32(255)

# Labels are indexed from 1, shift it so that indexes start at 0
y = [i-1 for i in y]
data_size = x.shape[0]

x -= mean_image

img_size2 = img_size * img_size

x = np.dstack((x[:, :img_size2], x[:, img_size2:2*img_size2], x[:, 2*img_size2:]))
x = x.reshape((x.shape[0], img_size, img_size, 3)).transpose(0, 3, 1, 2)

In [21]:
x.shape

(128116, 3, 64, 64)

In [9]:
x[0].shape

(3, 64, 64)

In [5]:
s = 4
image = torch.tensor(dat_batch['X_test'][s]) #34
print(dat_batch['Y_test'][s])
plt.imshow(image.permute(1, 2, 0))

NameError: name 'dat_batch' is not defined

# Advanced Functions

In [17]:
def unpickle(file):
    with open(file, 'rb') as fo:
        dict = pickle.load(fo)
    return dict

def load_databatch(data_folder, idx, img_size=64):
    data_file = os.path.join(data_folder, 'train_data_batch_')

    d = unpickle(data_file + str(idx))
    x = d['data']
    y = d['labels']
    mean_image = d['mean']

    x = x/np.float32(255)
    mean_image = mean_image/np.float32(255)

    # Labels are indexed from 1, shift it so that indexes start at 0
    y = [i-1 for i in y]
    data_size = x.shape[0]

    x -= mean_image

    img_size2 = img_size * img_size

    x = np.dstack((x[:, :img_size2], x[:, img_size2:2*img_size2], x[:, 2*img_size2:]))
    x = x.reshape((x.shape[0], img_size, img_size, 3)).transpose(0, 3, 1, 2)

    # create mirrored images
    X_train = x[0:data_size, :, :, :]
    Y_train = y[0:data_size]
    X_train_flip = X_train[:, :, :, ::-1]
    Y_train_flip = Y_train
    X_train = np.concatenate((X_train, X_train_flip), axis=0)
    Y_train = np.concatenate((Y_train, Y_train_flip), axis=0)

    return dict(
        X_train=(X_train), # lasagne.utils.floatX(X_train),
        Y_train=Y_train.astype('int32'),
        mean=mean_image)


def load_data(input_file, train= True):

    d = unpickle(input_file)
    x = d['data']
    y = d['labels']
    if train:
        mean_image = d['mean']

    x = np.divide(x,np.float32(255))
    mean_image = mean_image/np.float32(255)

    # Labels are indexed from 1, shift it so that indexes start at 0
    y = [i-1 for i in y]

    x -= mean_image
    img_size = 64
    img_size2 = img_size * img_size

    x = np.dstack((x[:, :img_size2], x[:, img_size2:2*img_size2], x[:, 2*img_size2:]))
    x = x.reshape((x.shape[0], img_size, img_size, 3)).transpose(0, 3, 1, 2)

    return x, y

def get_train_dat(p):

    path = p
    counter = 0

    for i in os.listdir(path):
        counter += 1
        print(counter)
        total_path = path + i

        x,y = load_data(total_path)

        if counter == 1:
            x_train, y_train = x, y
        else:
            x_train = np.append(x_train, x, axis = 0)
            y_train = y_train + y
    return x_train, y_train

def create_tuples(x,y):
    samps = []

    for i in np.arange(0,len(x),1):
        t = torch.Tensor(x[i]).view(3,64,64)
        tup = (t, int(y[i]))
        samps.append(tup)
    return samps

def get_test_dat(p):
    x_test , y_test = load_data(p, train = False)
    dat_test = create_tuples(x_test, y_test)
    return dat_test

def create_imnet_train_test(path_train, path_test):
    x_train, y_train = get_train_dat(path_train)
    dat_train = create_tuples(x_train, y_train)
    trainset = torch.utils.data.DataLoader(dat_train, batch_size = 10, shuffle = True)

    x_test,y_test = load_data(path_test)
    dat_test = create_tuples(x_test, y_test)
    testset = torch.utils.data.DataLoader(dat_test, batch_size = 10, shuffle = True)

    return trainset, testset



In [19]:
path_train = '/Users/dj/Desktop/MT_Desktop/ImageNet/ImageNet64/Imagenet64_train/'
path_test = '/Users/dj/Desktop/MT_Desktop/ImageNet/ImageNet64/val_data'

trainset, testset = create_imnet_train_test(path_train= path_train, path_test=path_test)

1


KeyboardInterrupt: 