In [18]:
from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
import matplotlib.pyplot as plt
import numpy as np

In [2]:
# Training settings
parser = argparse.ArgumentParser(description='Galaxy zoo project')
parser.add_argument('--data', type=str, default='data', metavar='D',
                    help="folder where data is located. train_data.zip and test_data.zip need to be found in the folder")
parser.add_argument('--batch-size', type=int, default=64, metavar='N',
                    help='input batch size for training (default: 64)')
parser.add_argument('--epochs', type=int, default=30, metavar='N',
                    help='number of epochs to train (default: 15)')
parser.add_argument('--lr', type=float, default=0.01, metavar='LR',
                    help='learning rate (default: 0.01)')
parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
                    help='SGD momentum (default: 0.5)')
parser.add_argument('--seed', type=int, default=1, metavar='S',
                    help='random seed (default: 1)')
parser.add_argument('--log-interval', type=int, default=10, metavar='N',
                    help='how many batches to wait before logging training status')


input_args = ""
args = parser.parse_args(input_args)
torch.manual_seed(args.seed)

<torch._C.Generator at 0x194a932af10>

In [3]:
### Data Initialization and Loading
from data_loader import initialize_data, data_transforms # data.py in the same folder
initialize_data(args.data) # extracts the zip files, makes a validation set

train_loader = torch.utils.data.DataLoader(
    datasets.ImageFolder(args.data,
                         transform=data_transforms),
    batch_size=args.batch_size, shuffle=True, num_workers=1)

data/images_training_rev1.zip


In [6]:
for i, (data, target) in enumerate(train_loader):
    if(i == 0):
        print(target)
        print(target.shape)
        print(data)
        print(data.shape)
        break

tensor([0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1,
        0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1,
        0, 1, 2, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0])
torch.Size([64])
tensor([[[[0.0078, 0.0039, 0.0157,  ..., 0.0157, 0.0196, 0.0196],
          [0.0235, 0.0078, 0.0118,  ..., 0.0118, 0.0157, 0.0196],
          [0.0314, 0.0118, 0.0039,  ..., 0.0039, 0.0078, 0.0118],
          ...,
          [0.0431, 0.0314, 0.0235,  ..., 0.0078, 0.0078, 0.0039],
          [0.0275, 0.0275, 0.0235,  ..., 0.0039, 0.0000, 0.0000],
          [0.0235, 0.0235, 0.0235,  ..., 0.0039, 0.0000, 0.0000]],

         [[0.0078, 0.0039, 0.0157,  ..., 0.0118, 0.0157, 0.0157],
          [0.0235, 0.0078, 0.0118,  ..., 0.0078, 0.0118, 0.0157],
          [0.0314, 0.0118, 0.0039,  ..., 0.0000, 0.0039, 0.0078],
          ...,
          [0.0314, 0.0196, 0.0078,  ..., 0.0078, 0.0039, 0.0039],
          [0.0235, 0.0157, 0.0078,  ..., 0.0000, 0.0000, 0.0000],
        

In [10]:
read_label_ids = np.load('label_ids.npy')
read_label_values = np.load('label_values.npy').item()

In [11]:
read_label_ids

array([100008, 100023, 100053, ..., 999958, 999964, 999967], dtype=int64)

In [17]:
type(read_label_values[100008][0])

float

In [39]:
image_folder = 'images_training_rev1'

import torch
from torch.utils import data

class Dataset(data.Dataset):
  #'Characterizes a dataset for PyTorch'
    def __init__(self, list_IDs, labels,transforms=None):
        'Initialization'
        self.labels = labels
        self.list_IDs = list_IDs
        self.transforms=transforms

    def __len__(self):
        'Denotes the total number of samples'
        return len(self.list_IDs)

    def __getitem__(self, index):
        'Generates one sample of data'
        # Select sample
        ID = self.list_IDs[index]

        # Load data and get label
        data = plt.imread('data/' + image_folder + '/' + str(ID) + '.jpg')
        X=data
        if self.transforms is not None:
            X=self.transforms(X)
        
        y = self.labels[ID]
        print(len(y))
        return X, y

In [40]:
params = {'batch_size': 64,
          'shuffle': True}
training_set = Dataset(read_label_ids[0:49001],read_label_values)
training_generator = data.DataLoader(training_set, **params)

In [46]:
i=0
for data, target in training_set:
    if(i == 0):
        #print(target)
        print(target)
        print(len(target))
        print(data)
        print(data.shape)
        break

37
[0.383147, 0.616853, 0.0, 0.0, 0.616853, 0.038452149, 0.578400851, 0.418397819, 0.198455181, 0.0, 0.104752126, 0.512100874, 0.0, 0.054453, 0.945547, 0.201462524, 0.181684476, 0.0, 0.0, 0.0272265, 0.0, 0.0272265, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.279952491, 0.138445328, 0.0, 0.0, 0.092885571, 0.0, 0.0, 0.0, 0.325512248]
37
[[[2 2 0]
  [4 4 2]
  [5 5 3]
  ...
  [2 2 2]
  [2 2 2]
  [2 2 2]]

 [[5 5 3]
  [6 6 4]
  [6 6 4]
  ...
  [2 2 2]
  [2 2 2]
  [2 2 2]]

 [[7 7 5]
  [8 8 6]
  [8 8 6]
  ...
  [2 2 2]
  [2 2 2]
  [3 3 3]]

 ...

 [[6 6 4]
  [5 5 3]
  [4 4 2]
  ...
  [4 4 2]
  [2 2 0]
  [0 0 0]]

 [[6 6 4]
  [5 5 3]
  [4 4 2]
  ...
  [2 2 0]
  [1 1 0]
  [0 0 0]]

 [[6 6 4]
  [5 5 3]
  [4 4 2]
  ...
  [1 1 0]
  [1 1 0]
  [0 0 0]]]
(424, 424, 3)


In [30]:
len(read_label_ids[0:49001]r)

49001

In [26]:
images = plt.imread('data/images_training_rev1/100008.jpg')

UnpicklingError: invalid load key, '\xff'.

type(images)

In [24]:
images.shape

(424, 424, 3)

In [25]:
images

array([[[2, 2, 0],
        [4, 4, 2],
        [5, 5, 3],
        ...,
        [2, 2, 2],
        [2, 2, 2],
        [2, 2, 2]],

       [[5, 5, 3],
        [6, 6, 4],
        [6, 6, 4],
        ...,
        [2, 2, 2],
        [2, 2, 2],
        [2, 2, 2]],

       [[7, 7, 5],
        [8, 8, 6],
        [8, 8, 6],
        ...,
        [2, 2, 2],
        [2, 2, 2],
        [3, 3, 3]],

       ...,

       [[6, 6, 4],
        [5, 5, 3],
        [4, 4, 2],
        ...,
        [4, 4, 2],
        [2, 2, 0],
        [0, 0, 0]],

       [[6, 6, 4],
        [5, 5, 3],
        [4, 4, 2],
        ...,
        [2, 2, 0],
        [1, 1, 0],
        [0, 0, 0]],

       [[6, 6, 4],
        [5, 5, 3],
        [4, 4, 2],
        ...,
        [1, 1, 0],
        [1, 1, 0],
        [0, 0, 0]]], dtype=uint8)