In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from math import sin
from functools import reduce

import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from torch.nn import ModuleList as mdl
import torch.optim as optim
import torch.nn.functional as F
import tqdm

from wxml.mlp import MLP
from wxml.train import train
from wxml.evaluate import evaluate
from wxml.data import make_loader

In [3]:
# functions to test
def parity(x):
    return x % 2


def compose(fs):
    def compose2(f, g):
        return lambda *a, **kw: f(g(*a, **kw))
    return reduce(compose2, fs)


# makes two-column dataset, first is data input to function of choice, second gets replaced w/ function output
def make_xs(n):    
    return np.random.randint(0, 10, (n, 2))


# calls function of choice, f
def make_data(n, f, dtype=None):
    xs = make_xs(n) if dtype is None else make_xs(n).astype(dtype)
    xs[:, 1] = f(xs[:, 0])
    xs, ys = xs[:, 0], xs[:, 1]
    return xs, ys


# TODO: add batches
def make_data_parity(n):
    xs_train, ys_train = make_data(n, parity)
    xs_test, ys_test = make_data(n // 10, parity)
    return xs_train, ys_train, xs_test, ys_test


def make_data_sin(n):
    xs_train, ys_train = make_data(n, np.sin, dtype=np.float32)
    xs_test, ys_test = make_data(n // 10, np.sin, dtype=np.float32)
    return xs_train, ys_train, xs_test, ys_test



def euclidean_distance(x, y):
    return torch.sqrt(torch.sum((x - y) ** 2))


def averager(f):
    return lambda x, y: f(x, y) / len(x)


n = 1000

xs_train_parity, ys_train_parity, xs_test_parity, ys_test_parity = make_data_parity(n)
xs_train_sin, ys_train_sin, xs_test_sin, ys_test_sin = make_data_sin(n)


print("parity:", list(zip(xs_train_parity[:5], ys_train_parity[:5])))

print("sin:", list(zip(xs_train_sin[:5], ys_train_sin[:5])))

parity: [(np.int64(9), np.int64(1)), (np.int64(3), np.int64(1)), (np.int64(2), np.int64(0)), (np.int64(7), np.int64(1)), (np.int64(2), np.int64(0))]
sin: [(np.float32(8.0), np.float32(0.98935825)), (np.float32(7.0), np.float32(0.6569866)), (np.float32(2.0), np.float32(0.9092974)), (np.float32(1.0), np.float32(0.841471)), (np.float32(9.0), np.float32(0.41211846))]


In [14]:
device = 'mps'
dtype = torch.float32

lr = 1e-3

num_layers = 2
input_dim = 1
output_dim = 1
hidden_dim = 10

model = MLP(num_layers, input_dim, hidden_dim, output_dim).to(device)
modek = compose([round, model])
opt = optim.SGD(model.parameters(), lr=lr)
loader_parity = make_loader(xs_train_parity, ys_train_parity, 32)

In [15]:
epochs = 100

train(model, averager(euclidean_distance), opt, loader_parity, loader_parity, epochs=epochs, device=device, dtype=dtype)

  0%|          | 0/100 [00:00<?, ?it/s]

epoch=0 | train_loss=0.9366 | val_loss=0.6708 | train_acy=0.5124 | val_acy=0.5355
epoch=1 | train_loss=0.5994 | val_loss=0.5624 | train_acy=0.5347 | val_acy=0.5323
epoch=2 | train_loss=0.5558 | val_loss=0.5447 | train_acy=0.5191 | val_acy=0.5187
epoch=3 | train_loss=0.5467 | val_loss=0.5482 | train_acy=0.5168 | val_acy=0.5194
epoch=4 | train_loss=0.5461 | val_loss=0.5432 | train_acy=0.5173 | val_acy=0.5147
epoch=5 | train_loss=0.5425 | val_loss=0.5408 | train_acy=0.5147 | val_acy=0.5228
epoch=6 | train_loss=0.5399 | val_loss=0.5368 | train_acy=0.5173 | val_acy=0.5155
epoch=7 | train_loss=0.5385 | val_loss=0.5363 | train_acy=0.5121 | val_acy=0.5202
epoch=8 | train_loss=0.5347 | val_loss=0.5314 | train_acy=0.5152 | val_acy=0.5161
epoch=9 | train_loss=0.5348 | val_loss=0.5321 | train_acy=0.5134 | val_acy=0.5147
epoch=10 | train_loss=0.5312 | val_loss=0.5304 | train_acy=0.5178 | val_acy=0.5135
epoch=11 | train_loss=0.5308 | val_loss=0.5304 | train_acy=0.5205 | val_acy=0.5218
epoch=12 | tra

{'train_losses': [0.9365525301545858,
  0.5993838310241699,
  0.5558450566604733,
  0.5466923881322145,
  0.5460747946053743,
  0.5424643792212009,
  0.5399085534736514,
  0.5384898567572236,
  0.534716391004622,
  0.5348157649859786,
  0.5311844972893596,
  0.5308430036529899,
  0.5298176240175962,
  0.5274962345138192,
  0.5251541621983051,
  0.5241602621972561,
  0.5225091716274619,
  0.5233238907530904,
  0.5218725949525833,
  0.5196460587903857,
  0.5157944951206446,
  0.517207307741046,
  0.5155486492440104,
  0.5157773494720459,
  0.5151094617322087,
  0.5149489874020219,
  0.5148283140733838,
  0.5125749679282308,
  0.5125298630446196,
  0.5116705372929573,
  0.5110133187845349,
  0.5108009446412325,
  0.509024390950799,
  0.5077102854847908,
  0.5096689611673355,
  0.5091968029737473,
  0.508889283053577,
  0.5083842538297176,
  0.507780771702528,
  0.5087563283741474,
  0.5064795035868883,
  0.5065734032541513,
  0.5049739675596356,
  0.5059308689087629,
  0.5053091458976269,

In [19]:
def round(x):
    if x >= 0.5: x = 1
    else: x = 0
    return x

print(xs_test_parity[:5], ys_test_parity[:5])
# Convert test data into DataLoader
loader_test_parity = make_loader(xs_test_parity, ys_test_parity, batch_size=32)

# Evaluate the model
evaluate(model, averager(euclidean_distance), loader_test_parity, device=device)


[2 9 4 3 0] [0 1 0 1 0]


RuntimeError: Tensor for argument input is on cpu but expected on mps