In [1]:
import sys
import os
import importlib
import glob
import random
from itertools import combinations

import numpy as np
import scipy
import sklearn
import torch
from torch import nn

import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib import ticker
import seaborn as sns

In [2]:
sys.path.insert(1, "../../python")
sys.path.insert(1, "../../..")
import util
import plotting

In [3]:
plt.style.use("custom")  # custom style sheet
plt.style.use("muted")  # muted color theme from SciencePlots
cm_seq = sns.cubehelix_palette(
    start=0, rot=-0.70, gamma=0.40, light=0.9, dark=0.1, as_cmap=True, reverse=True
)
cm_seq2 = sns.cubehelix_palette(
    start=0, rot=-0.70, gamma=0.40, light=0.8, dark=0.1, as_cmap=True, reverse=False
)
colors = mpl.colors.to_rgba_array(
    [
        "#364B9A",
        "#4A7BB7",
        "#6EA6CD",
        "#98CAE1",
        "#C2E4EF",
        "#EAECCC",
        "#FEDA8B",
        "#FDB366",
        "#F67E4B",
        "#DD3D2D",
        "#A50026",
    ]
)
cm_div = mpl.colors.LinearSegmentedColormap.from_list("", colors)

In [4]:
%load_ext autoreload
%autoreload 2

In [6]:
import ga

In [16]:
def mlp_constructor(*model_args, **model_kwargs):
    return ga.MultiLayerNet(*model_args, **model_kwargs)

In [44]:
a = torch.optim.Adam
optimizer = a(mlp.parameters())

In [47]:
a

torch.optim.adam.Adam

In [None]:
np.mean(

In [45]:
optimizer

Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    eps: 1e-08
    lr: 0.001
    maximize: False
    weight_decay: 0
)

In [37]:
mlp = ga.MultiLayerNet(3)

In [42]:
torch.optim.Optimizer

torch.optim.optimizer.Optimizer

In [31]:
# mlp = ga.MultiLayerNet()
input_dim = 3
model_args = [input_dim]
fitness_fn = torch.nn.MSELoss()
feature_list = np.arange(62)
g = ga.GeneticAlgorithm(mlp_constructor, model_args, {}, , fitness_fn, feature_list)

In [32]:
g.feature_list

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
       51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61])

In [28]:
g.fitness_fn(torch.Tensor([1, 2]), torch.Tensor([3, 4]))

tensor(4.)

In [29]:
g.fitnesses

In [49]:
g.individuals

Model: MultiLayerNet(
  (net): Sequential(
    (0): Linear(in_features=3, out_features=30, bias=True)
    (1): ReLU()
    (2): Linear(in_features=30, out_features=30, bias=True)
    (3): ReLU()
    (4): Linear(in_features=30, out_features=30, bias=True)
    (5): ReLU()
    (6): Linear(in_features=30, out_features=30, bias=True)
    (7): ReLU()
    (8): Linear(in_features=30, out_features=30, bias=True)
    (9): ReLU()
    (10): Linear(in_features=30, out_features=30, bias=True)
    (11): ReLU()
    (12): Linear(in_features=30, out_features=1, bias=True)
    (13): Sigmoid()
  )
)
        Model Args: [3]
        Model Kwargs: {}
        Feature set: [31 16 58]
        Training function: None


TypeError: __repr__ returned non-string (type NoneType)

In [95]:
def training_fn(model, dataloader, loss_fn, optimizer, indices=None, **kwargs):
    size = len(dataloader.dataset)
    # if indices is None:
    #     indices = np.arange(dataloader.dataset)
    for batch, (X, y) in enumerate(dataloader):
        # Compute prediction and loss
        pred = model(X[..., indices])
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [96]:
training_fn(mlp, batches, fitness_fn, optimizer, indices=[1, 2, 3])

loss: 0.020032  [    0/4150115]
loss: 0.019274  [1638400/4150115]
loss: 0.017420  [3276800/4150115]


In [97]:
pred = mlp(X[0, [1,2,3]])
loss = fitness_fn(pred, y[0])

In [106]:
loss.backward()

In [110]:
optimizer.step()

In [112]:
mlp.state_dict()

OrderedDict([('net.0.weight',
              tensor([[ 0.3266,  0.1367, -0.0217],
                      [-0.4050, -0.3973, -0.3435],
                      [-0.5753,  0.3009,  0.3371],
                      [-0.0408,  0.4110,  0.6375],
                      [ 0.3711,  0.4602,  0.2713],
                      [-0.0249,  0.4882,  0.1810],
                      [-0.2101,  0.3422, -0.2819],
                      [-0.0307,  0.6222, -0.1243],
                      [ 0.0590,  0.2028,  0.0099],
                      [-0.6049,  0.2348,  0.5337],
                      [-0.4241, -0.3018,  0.4183],
                      [-0.4677,  0.0084,  0.5790],
                      [-0.5060,  0.2210, -0.3350],
                      [-0.0408, -0.1774,  0.2993],
                      [ 0.2959, -0.4763,  0.0695],
                      [ 0.2401, -0.2408, -0.5435],
                      [-0.4813,  0.3001,  0.3782],
                      [-0.0155,  0.5526,  0.4008],
                      [ 0.2101,  0.3215,  0.1659],
 

In [62]:
cv_trajs = list(
    np.load("../../data/raw_feat/cv_dist_spin_anton.npy", allow_pickle=True)
)
cv_trajs.extend(np.load("../../data/raw_feat/cv_dist_spin_anton2.npy"))

In [63]:
sb_labels = []
for r in ("R217", "R223", "R226", "R229", "R232"):
    for n in ("D129", "D136", "D151", "D164", "E183", "D186"):
        sb_labels.append(f"{r} - {n}")

In [64]:
sb_trajs = list(np.load("../../data/raw_feat/feat2_raw_anton.npy", allow_pickle=True))
sb_trajs.extend(np.load("../../data/raw_feat/feat2_raw_anton2.npy"))

In [65]:
cv_arr = np.concatenate(cv_trajs)
sb_arr = np.concatenate(sb_trajs)
print(cv_arr.shape, sb_arr.shape)

(4150115, 2) (4150115, 60)


In [66]:
# load committors
q = np.load("../../data/feat2_dist_du_anton2/qp_downup_3.npy", allow_pickle=True)[8] # 50 ns
w = np.load("../../data/feat2_dist_du_anton2/weights_3_feat5ivac.npy", allow_pickle=True)[0]

In [67]:
X = torch.Tensor(np.hstack((cv_arr, sb_arr)))
y = torch.Tensor(np.concatenate(q)).unsqueeze(-1)
print(X.shape, y.shape)

torch.Size([4150115, 62]) torch.Size([4150115, 1])


In [69]:
q_dataset = ga.CommittorDataset(X, y)
batch_size=16384
batches = torch.utils.data.DataLoader(
    q_dataset, batch_size=batch_size, shuffle=True)

In [75]:
i = ga.Individual(mlp_constructor, model_args, {}, feature_list, training_fn, torch.optim.Adam, fitness_fn)

In [77]:
i.train(batches)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (16384x62 and 3x30)

In [61]:
i

Model: MultiLayerNet(
  (net): Sequential(
    (0): Linear(in_features=3, out_features=30, bias=True)
    (1): ReLU()
    (2): Linear(in_features=30, out_features=30, bias=True)
    (3): ReLU()
    (4): Linear(in_features=30, out_features=30, bias=True)
    (5): ReLU()
    (6): Linear(in_features=30, out_features=30, bias=True)
    (7): ReLU()
    (8): Linear(in_features=30, out_features=30, bias=True)
    (9): ReLU()
    (10): Linear(in_features=30, out_features=30, bias=True)
    (11): ReLU()
    (12): Linear(in_features=30, out_features=1, bias=True)
    (13): Sigmoid()
  )
)
Model Args: [3]
Model Kwargs: {}
Feature set: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
 48 49 50 51 52 53 54 55 56 57 58 59 60 61]
Training function: <function training_fn at 0x7f1d11340280>


TypeError: __repr__ returned non-string (type NoneType)

In [None]:
k