In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
import matplotlib.pyplot as plt

import generate as generate
from single_layer import *

## Experiment 1
- Data Generation: R1 -> R1, with only one activation unit
- model 
    - hidden_dim: [20]
    - lr: 0.01

In [13]:
# Constants
d = 8
M = 4
num = 1
T = 32
N_test = 102400
noise = 0.1

lr = 0.01
hidden_dim = [32, 32, 32]
dropout = 0.5
weight_decay = 0.001

In [14]:
(thetan, an, bn) = generate.generate_single_layer_v2(M, d, num)
(X, Y_noiseless) = generate.generate_single_data_v2(T, an, bn, thetan)
Y = generate.add_noise(Y_noiseless, noise)
print(X.shape)
print(Y.shape)

(1, 32, 8)
(1, 32)


In [17]:
num_experiments = 64
hidden_dim = [64, 64, 64]
for T in [8192]:
    for _ in range(num_experiments):
        (thetan, an, bn) = generate.generate_single_layer_v2(M, d, num)
        (X, Y_noiseless) = generate.generate_single_data_v2(T, an, bn, thetan)
        Y = generate.add_noise(Y_noiseless, noise)
        input = X[0]
        (model, epoch_number, best_vloss, train_loss) = train_one_model(
            hidden_dim, X[0], Y[0], 
            val_ratio=0.2, 
            lr=lr, 
            weight_decay=weight_decay,
            dropout=dropout,
            batch_size=4096,
            patience=20, 
            epochs=500,
            verbose=False,
        )
        # print(f"epochs: {epoch_number}, train loss: {train_loss} validation loss: {best_vloss}")
        # plt.plot(*zip(*sorted(zip(X[0], predicted))))
        # plt.legend(["real"] + ["predicted"])
        # plt.show()
        model.eval()
        (X_test, Y_test) = generate.generate_single_data_v2(N_test, an, bn, thetan)
        predicted = model(torch.Tensor(X_test)).detach().numpy()
        kl_divergence = generate.kl_divergence(Y_test, predicted.reshape(-1), noise)
        print(f"{d}\t{M}\t{T}\t{noise}\t{kl_divergence[0]}\t{hidden_dim}\t{dropout}\t{weight_decay}")
        # print(generate.kl_divergence(Y_test, 0, noise))
        # print(kl_divergence / generate.kl_divergence(Y_test, 0, noise))


In [None]:
!notify.sh 'training finished'

{"ok":true,"result":{"message_id":208,"from":{"id":883542720,"is_bot":true,"first_name":"\u5c0f\u52a9\u624b","username":"MMMyNotificationBot"},"chat":{"id":1172039795,"first_name":"\u534a\u4ed9","type":"private"},"date":1649110942,"text":"training finished"}}

In [11]:
T = 2**19
M = 4
d = 8
(thetan, an, bn) = generate.generate_single_layer_v2(M, d, num)
(X, Y_noiseless) = generate.generate_single_data_v2(T, an, bn, thetan)
Y = generate.add_noise(Y_noiseless, noise)
input = X[0]
print(X.shape, Y.shape)

(1, 524288, 8) (1, 524288)


In [12]:
(model, epoch_number, best_vloss, train_loss) = train_one_model(
    [128, 128, 128], X[0], Y[0], 
    val_ratio=0.2, 
    lr=lr, 
    weight_decay=0.001,
    dropout=0.5,
    batch_size=1024,
    patience=40, 
    epochs=1000,
    verbose=True,
)
# print(f"epochs: {epoch_number}, train loss: {train_loss} validation loss: {best_vloss}")
# plt.plot(*zip(*sorted(zip(X[0], predicted))))
# plt.legend(["real"] + ["predicted"])
# plt.show()
model.eval()
(X_test, Y_test) = generate.generate_single_data_v2(N_test, an, bn, thetan)
predicted = model(torch.Tensor(X_test)).detach().numpy()
kl_divergence = generate.kl_divergence(predicted.reshape(-1), Y_test, noise)
print(kl_divergence[0])
# print(generate.kl_divergence(Y_test, 0, noise))
# print(kl_divergence / generate.kl_divergence(Y_test, 0, noise))


EPOCH 1:
LOSS train 0.5092933320417636 valid 0.1130468025803566
EPOCH 2:
LOSS train 0.27473637258134237 valid 0.10431107133626938
EPOCH 3:
LOSS train 0.2522524456788854 valid 0.1119113564491272
EPOCH 4:
LOSS train 0.2317112789648335 valid 0.07538247108459473
EPOCH 5:
LOSS train 0.21920956682141235 valid 0.08208274841308594
EPOCH 6:
LOSS train 0.21737110374904262 valid 0.08745074272155762
EPOCH 7:
LOSS train 0.21492387212631178 valid 0.0676506906747818
EPOCH 8:
LOSS train 0.21028356359499256 valid 0.07523845136165619
EPOCH 9:
LOSS train 0.21345358123866523 valid 0.08272461593151093
EPOCH 10:
LOSS train 0.21048184215295607 valid 0.062370415776968
EPOCH 11:
LOSS train 0.20967752849910318 valid 0.05488964170217514
EPOCH 12:
LOSS train 0.2138896323922204 valid 0.06744629889726639
EPOCH 13:
LOSS train 0.21120101111691172 valid 0.07002749294042587
EPOCH 14:
LOSS train 0.21120364302542152 valid 0.07936234027147293
EPOCH 15:
LOSS train 0.20866841095976713 valid 0.06818018853664398
EPOCH 16:
LOS

KeyboardInterrupt: 

In [None]:
model

MultiLayer(
  (fcs): ModuleList(
    (0): Linear(in_features=8, out_features=128, bias=True)
    (1): Linear(in_features=128, out_features=128, bias=True)
    (2): Linear(in_features=128, out_features=128, bias=True)
    (3): Linear(in_features=128, out_features=1, bias=True)
  )
  (dropout): Dropout(p=0.5, inplace=False)
)

In [10]:
sum(p.numel() for p in model.parameters())

34305

In [None]:
6 / 