## Galaxy Zoo main notebook

### Python files required to run ths notebook: data_loader.py, YLabelCreate.py

In [1]:
# Imports

from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
import matplotlib.pyplot as plt
import numpy as np

In [2]:
# Training settings
parser = argparse.ArgumentParser(description='Galaxy zoo project')
parser.add_argument('--data', type=str, default='data', metavar='D',
                    help="folder where data is located. train_data.zip and test_data.zip need to be found in the folder")
parser.add_argument('--batch-size', type=int, default=64, metavar='N',
                    help='input batch size for training (default: 64)')
parser.add_argument('--epochs', type=int, default=30, metavar='N',
                    help='number of epochs to train (default: 15)')
parser.add_argument('--lr', type=float, default=0.01, metavar='LR',
                    help='learning rate (default: 0.01)')
parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
                    help='SGD momentum (default: 0.5)')
parser.add_argument('--seed', type=int, default=1, metavar='S',
                    help='random seed (default: 1)')
parser.add_argument('--log-interval', type=int, default=10, metavar='N',
                    help='how many batches to wait before logging training status')


input_args = ""
args = parser.parse_args(input_args)
torch.manual_seed(args.seed)

<torch._C.Generator at 0x2c091c2af50>

In [3]:
### Data Initialization and Loading
from data_loader import initialize_data, loader#, data_transforms # data.py in the same folder
initialize_data(args.data) # extracts the zip files, makes a validation set

data/images_training_rev1.zip


In [4]:
number_of_training_points = 5000
number_of_validation_points = 1000

from YLabelCreate import getYlabel

label_ids_training, label_ids_validation, label_values_training, label_values_validation = getYlabel(number_of_training_points, number_of_validation_points)


In [5]:
crop_size = 256
resolution = 64
batch_size = 64
shuffle = False
questions = 1

train_loader, validation_loader = loader(label_ids_training, label_values_training, label_ids_validation, label_values_validation, crop_size, resolution, batch_size, shuffle, questions)

In [6]:
type(train_loader)

torch.utils.data.dataloader.DataLoader

In [7]:
for ix, (data,target) in enumerate(train_loader):
    print(data.shape)
    print(target.shape)
    break;

torch.Size([64, 3, 64, 64])
torch.Size([64, 3])


In [8]:
### Neural Network and Optimizer
# We define neural net in model.py so that it can be reused by the evaluate.py script
from question_wise_model import Net
model = Net(questions)

optimizer = optim.Adam(model.parameters(), lr=args.lr)

loss_train = nn.MSELoss()
loss_validation = nn.MSELoss(reduction='sum')

In [9]:
def train(epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = Variable(data), Variable(target).float()
        optimizer.zero_grad()
        output = model(data)
        loss = loss_train(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % args.log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.data[0]))

In [10]:
def validation():
    model.eval()
    validation_loss = 0
    correct = 0
    for data, target in validation_loader:
        data, target = Variable(data, volatile=True), Variable(target).float()
        output = model(data)
        validation_loss += loss_validation(output, target) # sum up batch loss
        #pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability
        #correct += pred.eq(target.data.view_as(pred)).cpu().sum()

    validation_loss /= len(val_loader.dataset)
    print('\nValidation set: Average loss:' +  str(validation_loss))

In [None]:
for epoch in range(1, args.epochs + 1):
    train(epoch)
    validation()

  del sys.path[0]




In [None]:
model_file = 'model_question_wise.pth'
torch.save(model.state_dict(), model_file)