## Galaxy Zoo main notebook

### Python files required to run ths notebook: data_loader.py, separate_training_validation.py

In [21]:
from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
import matplotlib.pyplot as plt
import numpy as np


In [22]:
# Training settings
parser = argparse.ArgumentParser(description='Galaxy zoo project')
parser.add_argument('--data', type=str, default='data', metavar='D',
                    help="folder where data is located. train_data.zip and test_data.zip need to be found in the folder")
parser.add_argument('--batch-size', type=int, default=64, metavar='N',
                    help='input batch size for training (default: 64)')
parser.add_argument('--epochs', type=int, default=10, metavar='N',
                    help='number of epochs to train (default: 15)')
parser.add_argument('--lr', type=float, default=0.01, metavar='LR',
                    help='learning rate (default: 0.01)')
parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
                    help='SGD momentum (default: 0.5)')
parser.add_argument('--seed', type=int, default=1, metavar='S',
                    help='random seed (default: 1)')
parser.add_argument('--log-interval', type=int, default=10, metavar='N',
                    help='how many batches to wait before logging training status')


input_args = ""
args = parser.parse_args(input_args)
torch.manual_seed(args.seed)

<torch._C.Generator at 0x11286dad0>

In [23]:
### Data Initialization and Loading
from data_loader import initialize_data, loader#, data_transforms # data.py in the same folder
initialize_data(args.data) # extracts the zip files, makes a validation set

data/images_training_rev1.zip


In [24]:
from separate_training_validation import separate

label_ids_training, label_ids_validation, label_values_training, label_values_validation = separate()


In [25]:
crop_size = 256
resolution = 64
batch_size = 64
shuffle = False
questions = 0

train_loader = loader(label_ids_training, label_values_training, label_ids_validation, label_values_validation, crop_size, resolution, batch_size, shuffle, questions)

In [26]:
type(train_loader)

torch.utils.data.dataloader.DataLoader

In [27]:
for ix, (data,target) in enumerate(train_loader):
    print(data.shape)
    print(target.shape)
    break;

torch.Size([64, 3, 64, 64])
torch.Size([64, 37])


In [28]:
from Model_All_Questions import Net

In [29]:
model = Net()
optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)
loss_fun=nn.MSELoss()

In [30]:
def train(epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = Variable(data), Variable(target).float()
        optimizer.zero_grad()
        output = model(data)
        loss = loss_fun(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % args.log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                                                                           epoch, batch_idx * len(data), len(train_loader.dataset),
                                                                           100. * batch_idx / len(train_loader), loss.data[0]))



In [31]:
def validation():
    model.eval()
    validation_loss = 0
    correct = 0
    for data, target in val_loader:
        data, target = Variable(data, volatile=True), Variable(target)
        output = model(data)
        validation_loss += F.nll_loss(output, target, size_average=False).data[0] # sum up batch loss
        pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability
        correct += pred.eq(target.data.view_as(pred)).cpu().sum()
    
        validation_loss /= len(val_loader.dataset)



        print('\nValidation set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
                                                                                           
                                                                                           validation_loss, correct, len(val_loader.dataset),
                                                                                           
                                                                                           100. * correct / len(val_loader.dataset)))
        return 100. * correct / len(val_loader.dataset)
            

In [32]:
def trainaccuracy():
    model.eval()
    train_loss = 0
    correct = 0
    
    for data, target in train_loader:
        data, target = Variable(data, volatile=True), Variable(target)
        output = model(data)
        train_loss += F.nll_loss(output, target, size_average=False).data[0] # sum up batch loss
        pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability
        correct += pred.eq(target.data.view_as(pred)).cpu().sum()

    train_loss /= len(train_loader.dataset)
    print('\nTrain set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
                                                                                  
                                                                                  train_loss, correct, len(train_loader.dataset),
                                                                                  
                                                                                  100. * correct / len(train_loader.dataset)))



    return  100. * correct / len(train_loader.dataset)


In [33]:
accuracy_train=[]
accuracy_val=[]
for epoch in range(1, args.epochs + 1):
    train(epoch)
    t=trainaccuracy()
    accuracy_train.append(t)


    v=validation()
    accuracy_val.append(v)
    model_file = 'model_' + str(epoch) + '.pth'
    torch.save(model.state_dict(), model_file)

  del sys.path[0]




KeyboardInterrupt: 