In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import os
import pickle
import torch
from torch import nn, optim
from torch.utils import data

In [3]:
# load in data from Problem Set 5
# load in data from pset5
tr_file  = "ap17_xpcont_train.pickle"
val_file = "ap17_xpcont_validation.pickle"

with open(tr_file, 'rb') as f1:
    data_tr = pickle.load(f1)

with open(val_file, 'rb') as f2:
    data_val = pickle.load(f2)
    
print("Keys of training set and validation set are:")
print(data_tr.keys(), "\n", data_val.keys())

print("array shape of traing set and validation set are:")
print([data_tr[_].shape for _ in data_tr.keys()], "\n", [data_val[_].shape for _ in data_val.keys()])

Keys of training set and validation set are:
dict_keys(['bp_coef', 'rp_coef', 'labels']) 
 dict_keys(['bp_coef', 'rp_coef', 'labels'])
array shape of traing set and validation set are:
[(4000, 55), (4000, 55), (4000, 2)] 
 [(1000, 55), (1000, 55), (1000, 2)]


In [4]:
bp_tr, rp_tr = data_tr['bp_coef'], data_tr['rp_coef']
teff_tr = data_tr['labels'][:,0]

nstars = len(teff_tr)

In [5]:
# labels = effective temperature
Teffs = data_tr['labels'][:,0]

In [6]:
# construct our features, using the first 8-9 coefficients from BPs and RPs, and dividing out by the first RP term
BPs = np.zeros((nstars, 8))
RPs = np.zeros((nstars, 9))
for idx in range(nstars):
    BPs[idx] = data_tr['bp_coef'][idx,:8] / data_tr['rp_coef'][idx,0]
    RPs[idx] = data_tr['rp_coef'][idx,:9] / data_tr['rp_coef'][idx,0]
BPs = np.insert(BPs, 0, 1., axis=1)
print(BPs.shape, RPs.shape)

(4000, 9) (4000, 9)


I'm going to try to use pytorch to implement a neural network to learn the temperature labels from the BP and RP spectral coefficients:

In [7]:
features_bp = torch.HalfTensor(BPs.T)

For now the notebook kernel restarts automatically after trying to create a PyTorch tensor with the BP/RP coefficients as in the above cell—I think it's a memory allocation issue (?). I'm going ahead and submitting what I have but will continue to try to figure this out.

A smaller, example linear regression with pytorch to make sure I understand the process:

This is copied almost directly from https://www.docker.com/blog/how-to-train-and-deploy-a-linear-regression-model-using-pytorch-part-1/

In [3]:
# create a fake data set
def synthetic_data(m, c, num_examples):
    """Generate y = mX + bias(c) + noise"""
    X = torch.normal(0, 1, (num_examples, len(m)))
    y = torch.matmul(X, m) + c
    y += torch.normal(0, 0.01, y.shape)
    return X, y.reshape((-1, 1))

In [4]:
true_m = torch.tensor([2, -3.4])
true_c = 4.2
features, labels = synthetic_data(true_m, true_c, 1000)

In [5]:
features.shape, labels.shape

(torch.Size([1000, 2]), torch.Size([1000, 1]))

In [6]:
# read the data set and create a small batch
def load_array(data_arrays, batch_size, is_train=True):
    """Construct a PyTorch data iterator."""
    dataset = data.TensorDataset(*data_arrays)  # TensorDataset() stores the samples and their corresponding labels
    return data.DataLoader(dataset, batch_size, shuffle=is_train)   # DataLoader() wraps an iterator around the TensorDataset for easier access

In [7]:
batch_size = 10
data_iter = load_array((features, labels), batch_size)
next(iter(data_iter))       # iter() creates a Python iterator, while next() obtains the first item from that iterator

[tensor([[ 0.7861, -0.6197],
         [-0.9774,  0.3925],
         [-0.3230,  1.0095],
         [-0.2083,  0.6822],
         [-3.6081, -0.4284],
         [ 0.5964, -0.1668],
         [ 3.7271,  0.3294],
         [-1.7750, -0.3569],
         [-1.4828, -0.7950],
         [-0.2975, -1.2618]]),
 tensor([[ 7.8860],
         [ 0.9126],
         [ 0.1178],
         [ 1.4621],
         [-1.5677],
         [ 5.9650],
         [10.5411],
         [ 1.8576],
         [ 3.9408],
         [ 7.8888]])]

In [8]:
# define a model: initialization
# create a single-layer feed-forward network with 2 inputs and 1 output
net = nn.Linear(2, 1)
# initialize model params
net.weight.data.normal_(0, 0.01)    # initial weights are Gaussian normal distribution with mu=0, sigma=0.01
net.bias.data.fill_(0)              # bias is set simply to zero

tensor([0.])

In [9]:
# define the loss function: RMS
loss = nn.MSELoss()

In [10]:
# define an optimization algorithm: stochastic gradient descent
trainer = torch.optim.SGD(net.parameters(), lr=0.03)            # lr = learning rate; this determines the update step during training

TRAINING

For each minibatch:
- Compute predictions and calculate the loss
- Calculate gradients by running the backpropagation
- Update the model parameters
- Compute the loss after each epoch

In [11]:
nepochs = 5
for epoch in range(nepochs):
    for X, y in data_iter:
        l = loss(net(X), y)
        trainer.zero_grad()     # sets gradients to zero
        l.backward()            # backpropagation
        trainer.step()          # parameter update
        l = loss(net(features), labels)
        print(f'epoch {epoch + 1}, loss {l:f}')

epoch 1, loss 28.256584
epoch 1, loss 23.767817
epoch 1, loss 21.012089
epoch 1, loss 19.068922
epoch 1, loss 17.176157
epoch 1, loss 15.164724
epoch 1, loss 12.978922
epoch 1, loss 11.124973
epoch 1, loss 8.468372
epoch 1, loss 7.400248
epoch 1, loss 6.494429
epoch 1, loss 6.079139
epoch 1, loss 5.574371
epoch 1, loss 4.859051
epoch 1, loss 4.308041
epoch 1, loss 3.731048
epoch 1, loss 3.390696
epoch 1, loss 3.044938
epoch 1, loss 2.736085
epoch 1, loss 2.508322
epoch 1, loss 2.318806
epoch 1, loss 2.107263
epoch 1, loss 1.941071
epoch 1, loss 1.748525
epoch 1, loss 1.498306
epoch 1, loss 1.416942
epoch 1, loss 1.060823
epoch 1, loss 0.923746
epoch 1, loss 0.813718
epoch 1, loss 0.747465
epoch 1, loss 0.678934
epoch 1, loss 0.608004
epoch 1, loss 0.499118
epoch 1, loss 0.402225
epoch 1, loss 0.377671
epoch 1, loss 0.326749
epoch 1, loss 0.286112
epoch 1, loss 0.243903
epoch 1, loss 0.205048
epoch 1, loss 0.188293
epoch 1, loss 0.150663
epoch 1, loss 0.131049
epoch 1, loss 0.112258
epo

In [12]:
# results: compute errors by comparing the true value with the trained model parameters
m = net.weight.data
print('error in estimating m:', true_m - m.reshape(true_m.shape))
c = net.bias.data
print('error in estimating c:', true_c - c)

error in estimating m: tensor([ 0.0008, -0.0004])
error in estimating c: tensor([-1.5736e-05])
