In [2]:
import sys
import numpy as np
from matplotlib import pyplot
import torch
import torch.nn as nn
from torch import optim
from torch.utils.data import DataLoader
from torch.utils.data.dataset import Dataset
from icecream import ic
import pandas as pd

import random

ic("USING pytorch VERSION: ", torch.__version__)

ic| 'USING pytorch VERSION: ', torch.__version__: '1.6.0'


('USING pytorch VERSION: ', '1.6.0')

## Define a pytorch Dataset object to contain the training and testing data
Pytorch handles data shuffling and batch loading, as long as the user provides a "Dataset" class. This class is just a wrapper for your data that casts the data into pytorch tensor format and returns slices of the data. In this case, our data is in numpy format, which conveniently pytorch has a method for converting to their native format.

The init function takes the path to the csv and creates a dataset out of it. I actually have three different options here. The dataset could be composed such that x is the 'timestamp' of the movement,the previous set of angles, or a tuple of both.

In [6]:
class AngleDataset(Dataset):
    def __init__(self, x, y):
        x_dtype = torch.FloatTensor
        y_dtype = torch.FloatTensor     # for MSE or L1 Loss

        self.length = x.shape[0]

        self.x_data = torch.from_numpy(x).type(x_dtype)
        self.y_data = torch.from_numpy(y).type(y_dtype)

    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]

    def __len__(self):
        return self.length

def create_datasets(csv_path: str, train_perc: float = 0.8):
    df = pd.read_csv(csv_path)
    length = len(df)

    x_data = np.array([])
    y_data = np.array([])

    # if x = curr angles and y = next angles
    for i in range(length):
        if i < length - 1:
            x = np.array(df.iloc[i])
            y = np.array(df.iloc[i + 1])
        else:
            #since it loops anyway
            x = np.array(df.iloc[i])
            y = np.array(df.iloc[0])
        
        x_data = np.append(x_data, x)
        y_data = np.append(y_data,y)

    # if x = timestamp and y = angles
    # self.x_data = np.array(range(0, len(self.df)))
    # for i in range(self.length):
    #     self.y_data.append(self.df.iloc[i])
    
    # if x = both timestamp and curr_angles
    # timestamps = range(0, len(self.df))
    # for i in range(self.length):
    #     if i < self.length - 1:
    #         x = np.array(self.df.iloc[i])
    #         y = np.array(self.df.iloc[i + 1])
    #     else:
    #         #since it loops anyway
    #         x = np.array(self.df.iloc[i])
    #         y = np.array(self.df.iloc[0])
        
    #     self.x_data.append((timestamps[i], x))
    #     self.y_data.append(y)

    
    np.random.shuffle(x_data)
    np.random.shuffle(y_data)

    last_train_idx = int(len(x_data) * train_perc)

    train_x = x_data[:last_train_idx]
    train_y = y_data[:last_train_idx]
    test_x = x_data[last_train_idx:]
    test_y = y_data[last_train_idx:]

    return AngleDataset(x=train_x, y=train_y), AngleDataset(x=test_x, y=test_y)

train_dataset, test_dataset = create_datasets('./walk_angles.csv')

train_dataset.x_data.shape

        


ic| len(x_data): 12
ic| len(x_data): 24
ic| len(x_data): 36
ic| len(x_data): 48
ic| len(x_data): 60
ic| len(x_data): 72
ic| len(x_data): 84
ic| len(x_data): 96
ic| len(x_data): 108
ic| len(x_data): 120
ic| len(x_data): 132
ic| len(x_data): 144
ic| len(x_data): 156
ic| len(x_data): 168
ic| len(x_data): 180
ic| len(x_data): 192
ic| len(x_data): 204
ic| len(x_data): 216
ic| len(x_data): 228
ic| len(x_data): 240
ic| len(x_data): 252
ic| len(x_data): 264
ic| len(x_data): 276
ic| len(x_data): 288
ic| len(x_data): 300
ic| len(x_data): 312
ic| len(x_data): 324
ic| len(x_data): 336
ic| len(x_data): 348
ic| len(x_data): 360
ic| len(x_data): 372
ic| len(x_data): 384
ic| len(x_data): 396
ic| len(x_data): 408
ic| len(x_data): 420
ic| len(x_data): 432
ic| len(x_data): 444
ic| len(x_data): 456
ic| len(x_data): 468
ic| len(x_data): 480
ic| len(x_data): 492
ic| len(x_data): 504
ic| len(x_data): 516
ic| len(x_data): 528
ic| len(x_data): 540
ic| len(x_data): 552
ic| len(x_data): 564
ic| len(x_data): 576


torch.Size([854])

## Define training methods for the model
These methods use an initialized model and training data to iteratively perform the forward and backward pass of optimization. Aside from some data reformatting that depends on the input, output, and loss function, these methods will always be the same for any shallow neural network.

In [7]:
def train_batch(model, x, y, optimizer, loss_fn):
    # Run forward calculation
    y_predict = model.forward(x)

    # Compute loss.
    loss = loss_fn(y_predict, y)

    # Before the backward pass, use the optimizer object to zero all of the
    # gradients for the variables it will update (which are the learnable weights
    # of the model)
    optimizer.zero_grad()

    # Backward pass: compute gradient of the loss with respect to model
    # parameters
    loss.backward()

    # Calling the step function on an Optimizer makes an update to its
    # parameters
    optimizer.step()

    return loss.data.item()


def train(model, loader, optimizer, loss_fn, epochs=5):
    losses = list()

    batch_index = 0
    for e in range(epochs):
        for x, y in loader:
            loss = train_batch(model=model, x=x, y=y, optimizer=optimizer, loss_fn=loss_fn)
            losses.append(loss)

            batch_index += 1

        if e % 50 == 0:
          ic("Epoch: ", e+1)
          ic("Batches: ", batch_index)

    return losses


## Define testing methods for the model
These methods are like training, but we don't need to update the parameters of the model anymore because when we call the test() method, the model has already been trained. Instead, this method just calculates the predicted y values and returns them, AKA the forward pass.


In [13]:
def test_batch(model, x, y):
    # run forward calculation
    y_predict = model.forward(x)

    return y, y_predict


def test(model, loader):
    y_vectors = list()
    y_predict_vectors = list()

    batch_index = 0
    for x, y in loader:
        y, y_predict = test_batch(model=model, x=x, y=y)

        y_vectors.append(y.data.numpy())
        y_predict_vectors.append(y_predict.data.numpy())

        batch_index += 1

    y_predict_vector = np.concatenate(y_predict_vectors)

    return y_predict_vector


## Define plotting method for loss
This is a plotting method for looking at the behavior of the loss over training iterations.

In [14]:
def plot_loss(losses, show=True):
    fig = pyplot.gcf()
    fig.set_size_inches(8,6)
    ax = pyplot.axes()
    ax.set_xlabel("Iteration")
    ax.set_ylabel("Loss")
    x_loss = list(range(len(losses)))
    pyplot.plot(x_loss, losses)

    if show:
        pyplot.show()

    pyplot.close()


## Define Model Architecture
- 12 inputs = 3 joint angles per leg, 4 legs
- 12 outputs = *same as above*


In [48]:
class PytorchBrain(torch.nn.Module):
    _id = 0 # Global genome identifier
    _num_inputs = 12
    _num_outputs = 12

    # @classmethod
    # def __get_new_id(cls):
    #     cls._id += 1
    #     return cls._id

    @classmethod
    def get_num_outputs(cls):
        return cls._num_outputs

    def __init__(self):
        super().__init__()
        
        # Inputs to hidden layer linear transformation
        self.h1 = torch.nn.Linear(PytorchBrain._num_inputs, 12)
        self.relu = torch.nn.ReLU() #output is same shape as input
        self.batch_norm = torch.nn.BatchNorm1d(12)
        self.h2 = torch.nn.Linear(12, 12)
        self.h3 = torch.nn.Linear(12, 12)

        # Output layer
        self.output = torch.nn.Linear(12, PytorchBrain._num_outputs)
        
        # Define sigmoid activation
        self.sigmoid = torch.nn.Sigmoid()
        self.tanh = torch.nn.Tanh()
        
    def forward(self, x):
        # Pass the input tensor through each of our operations
        x = self.h1(x)
        x = self.relu(x)
        # x = self.batch_norm(x)
        x = self.sigmoid(x)

        x = self.h2(x)
        x = self.relu(x)
        # x = self.batch_norm(x)
        x = self.sigmoid(x)

        x = self.h3(x)
        x = self.relu(x)
        # x = self.batch_norm(x)
        x = self.sigmoid(x)

        x = self.tanh(x)
        x = self.output(x)
        
        return x

## Define Run function

In [51]:
def run(train_dataset, test_dataset, epochs=4):
    # Batch size is the number of training examples used to calculate each iteration's gradient
    batch_size_train = 12
    
    data_loader_train = DataLoader(dataset=train_dataset, batch_size=batch_size_train, shuffle=True)
    data_loader_test = DataLoader(dataset=test_dataset, batch_size=len(test_dataset), shuffle=False)
    
    # Define the hyperparameters
    learning_rate = 1e-3
    pytorch_model = PytorchBrain()
    
    # Initialize the optimizer with above parameters
    optimizer = optim.Adam(pytorch_model.parameters(), lr=learning_rate)

    # Define the loss function
    loss_fn = nn.MSELoss()  # mean squared error

    # Train and get the resulting loss per iteration
    loss = ic(train(model=pytorch_model, loader=data_loader_train, optimizer=optimizer, loss_fn=loss_fn, epochs=epochs))
    

    # Test and get the resulting predicted y values
    y_predict = ic(test(model=pytorch_model, loader=data_loader_test))

    return loss, y_predict, pytorch_model

## Run and plot results

In [52]:

ic("Train set size: ", train_dataset.length)
ic("Test set size: ", test_dataset.length)

losses, y_predict = run(train_dataset=train_dataset, test_dataset=test_dataset, epochs=400)

ic("Final loss:", sum(losses[-100:])/100)
plot_loss(losses)

# Transpose the matrices so we can plot.
y_test = test_dataset.y_data.transpose()
y_predict = y_predict.transpose()

for yt, yp in zip(y_test, y_predict):
  fig2 = pyplot.figure(dpi=300)
  fig2.set_size_inches(8,6)
  pyplot.scatter(test_dataset.x_data, yt, marker='o', s=0.2)
  pyplot.scatter(test_dataset.x_data, yp, marker='o', s=0.3)
  pyplot.text(-9, 0.44, "- Prediction", color="orange", fontsize=8)
  pyplot.text(-9, 0.48, "- Sine (with noise)", color="blue", fontsize=8)
  pyplot.show()


ic| 'Train set size: ', train_dataset.length: 854
ic| 'Test set size: ', test_dataset.length: 214


RuntimeError: size mismatch, m1: [1 x 2], m2: [12 x 12] at ..\aten\src\TH/generic/THTensorMath.cpp:41