# Featurize processed data

In [1]:
repo_path = "/Users/johnzhou/research/decision-making"

## Import packages

In [2]:
import sys
sys.path.append(repo_path)

In [3]:
import numpy as np
import matplotlib.pyplot as plt
from omegaconf import OmegaConf
from sklearn.cluster import KMeans
import torch

from src.data.experiment_data import ExperimentData
from src.features.build_features import normalize_features, remove_invalid_fits
from src.models.sigmoidnet import SigmoidNet
from src.models.train import train

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
expt_name = "new_run"
bigboy = ExperimentData(expt_name, repo_path)
# print(bigboy.sigmoid_parameters.shape, bigboy.foraging_efficiency.shape, bigboy.choice_blocks.shape)
valid_idxs = bigboy.get_valid_idxs(boundary=50000)
a = bigboy.build_modeling_feats(include_feff=False, include_block=False, idxs=valid_idxs)
b = bigboy.build_modeling_labels(idxs=valid_idxs)

print(a.shape)

24945
(49945, 1, 3)
(49945, 1)
(49945, 1, 3)


Sigmoid fitting with MSE loss seems to be empirically more sensitive.

In [5]:
config = OmegaConf.create({
    "name": expt_name,
    "random_seed": 4995,
    "model": {
        "in_features": 3,
        "linear_layers": [32, 8, 4],
        "use_batch_norm": False
    },
    "learning_rate": 1e-4,
    "data": {
        "feature_path": f"{bigboy.data_path}/modeling_features.npy",
        "label_path": f"{bigboy.data_path}/modeling_labels.npy",
        "train_proportion": 0.8,
        "train_batch_size": 128,
        "val_batch_size": 128
    },
    "trainer": {
        "gpus": 0,
        "max_epochs": 1000
    },

})

OmegaConf.save(config=config, f=f"{repo_path}/configs/model_configs/sigmoidnet_train.yaml")

In [6]:
%reload_ext tensorboard
%tensorboard --logdir=$bigboy.data_path/lightning_logs

In [None]:
system, trainer = train(
    SigmoidNet,
    OmegaConf.to_container(config),
    experiment_dir=bigboy.data_path,
    checkpoint_name="model")

Global seed set to 4995


LinearEmbedder(
  (layers): Sequential(
    (0): Linear(in_features=3, out_features=32, bias=True)
    (1): LeakyReLU(negative_slope=0.05)
    (2): Linear(in_features=32, out_features=8, bias=True)
    (3): LeakyReLU(negative_slope=0.05)
    (4): Linear(in_features=8, out_features=4, bias=True)
    (5): LeakyReLU(negative_slope=0.05)
    (6): Linear(in_features=4, out_features=2, bias=True)
  )
)


  rank_zero_deprecation(
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name  | Type           | Params
-----------------------------------------
0 | loss  | SupConLoss     | 0     
1 | model | LinearEmbedder | 438   
-----------------------------------------
438       Trainable params
0         Non-trainable params
438       Total params
0.002     Total estimated model params size (MB)


                                                                                          

  rank_zero_warn(
  rank_zero_warn(


Epoch 0:  77%|████████████████     | 300/392 [00:01<00:00, 156.77it/s, loss=4.84, v_num=1]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                  | 0/79 [00:00<?, ?it/s][A
Epoch 0:  82%|█████████████████▏   | 320/392 [00:02<00:00, 154.21it/s, loss=4.84, v_num=1][A
Epoch 0:  87%|██████████████████▏  | 340/392 [00:02<00:00, 157.59it/s, loss=4.84, v_num=1][A
Epoch 0:  92%|███████████████████▎ | 360/392 [00:02<00:00, 160.37it/s, loss=4.84, v_num=1][A
Epoch 0:  97%|████████████████████▎| 380/392 [00:02<00:00, 162.94it/s, loss=4.84, v_num=1][A
Epoch 0: 100%|█████| 392/392 [00:02<00:00, 165.07it/s, loss=4.75, v_num=1, val_loss=4.840][A
Epoch 1:  77%|▊| 300/392 [00:01<00:00, 189.23it/s, loss=4.84, v_num=1, val_loss=4.840, tra[A
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                  | 0/79 [00:00<?, ?it/s][A
Epoch 1:  82%|▊| 320/392 [00:01<00:00, 174.38it/s, loss=4.84, v_num=1, val_loss=4.840, tr

Epoch 11:  97%|▉| 380/392 [00:01<00:00, 208.08it/s, loss=4.54, v_num=1, val_loss=4.560, tr[A
Epoch 11: 100%|█| 392/392 [00:01<00:00, 210.40it/s, loss=4.47, v_num=1, val_loss=4.540, tr[A
Epoch 12:  77%|▊| 300/392 [00:01<00:00, 219.15it/s, loss=4.53, v_num=1, val_loss=4.540, tr[A
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                  | 0/79 [00:00<?, ?it/s][A
Epoch 12:  82%|▊| 320/392 [00:01<00:00, 217.88it/s, loss=4.53, v_num=1, val_loss=4.540, tr[A
Epoch 12:  87%|▊| 340/392 [00:01<00:00, 222.87it/s, loss=4.53, v_num=1, val_loss=4.540, tr[A
Epoch 12:  92%|▉| 360/392 [00:01<00:00, 227.07it/s, loss=4.53, v_num=1, val_loss=4.540, tr[A
Epoch 12:  97%|▉| 380/392 [00:01<00:00, 231.51it/s, loss=4.53, v_num=1, val_loss=4.540, tr[A
Epoch 12: 100%|█| 392/392 [00:01<00:00, 232.71it/s, loss=4.46, v_num=1, val_loss=4.530, tr[A
Epoch 13:  77%|▊| 300/392 [00:01<00:00, 221.99it/s, loss=4.52, v_num=1, val_loss=4.530, tr[A
Validation: 0it [00:00, ?i

Epoch 23:  87%|▊| 340/392 [00:01<00:00, 188.85it/s, loss=4.48, v_num=1, val_loss=4.500, tr[A
Epoch 23:  92%|▉| 360/392 [00:01<00:00, 195.21it/s, loss=4.48, v_num=1, val_loss=4.500, tr[A
Epoch 23:  97%|▉| 380/392 [00:01<00:00, 201.53it/s, loss=4.48, v_num=1, val_loss=4.500, tr[A
Epoch 23: 100%|█| 392/392 [00:01<00:00, 204.64it/s, loss=4.43, v_num=1, val_loss=4.500, tr[A
Epoch 24:  77%|▊| 300/392 [00:02<00:00, 146.93it/s, loss=4.48, v_num=1, val_loss=4.500, tr[A
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                  | 0/79 [00:00<?, ?it/s][A
Epoch 24:  82%|▊| 320/392 [00:02<00:00, 149.01it/s, loss=4.48, v_num=1, val_loss=4.500, tr[A
Epoch 24:  87%|▊| 340/392 [00:02<00:00, 152.88it/s, loss=4.48, v_num=1, val_loss=4.500, tr[A
Epoch 24:  92%|▉| 360/392 [00:02<00:00, 156.00it/s, loss=4.48, v_num=1, val_loss=4.500, tr[A
Epoch 24:  97%|▉| 380/392 [00:02<00:00, 156.73it/s, loss=4.48, v_num=1, val_loss=4.500, tr[A
Epoch 24: 100%|█| 392/392 

Validation:   0%|                                                  | 0/79 [00:00<?, ?it/s][A
Epoch 35:  82%|▊| 320/392 [00:02<00:00, 140.52it/s, loss=4.48, v_num=1, val_loss=4.490, tr[A
Epoch 35:  87%|▊| 340/392 [00:02<00:00, 143.56it/s, loss=4.48, v_num=1, val_loss=4.490, tr[A
Epoch 35:  92%|▉| 360/392 [00:02<00:00, 147.73it/s, loss=4.48, v_num=1, val_loss=4.490, tr[A
Epoch 35:  97%|▉| 380/392 [00:02<00:00, 151.05it/s, loss=4.48, v_num=1, val_loss=4.490, tr[A
Epoch 35: 100%|█| 392/392 [00:02<00:00, 141.16it/s, loss=4.42, v_num=1, val_loss=4.490, tr[A
Epoch 36:  77%|▊| 300/392 [00:02<00:00, 134.10it/s, loss=4.48, v_num=1, val_loss=4.490, tr[A
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                  | 0/79 [00:00<?, ?it/s][A
Epoch 36:  82%|▊| 320/392 [00:02<00:00, 135.52it/s, loss=4.48, v_num=1, val_loss=4.490, tr[A
Epoch 36:  87%|▊| 340/392 [00:02<00:00, 139.59it/s, loss=4.48, v_num=1, val_loss=4.490, tr[A
Epoch 36:  92%|▉| 360/392 

Epoch 47:  77%|▊| 300/392 [00:02<00:00, 123.47it/s, loss=4.48, v_num=1, val_loss=4.490, tr[A
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                  | 0/79 [00:00<?, ?it/s][A
Epoch 47:  82%|▊| 320/392 [00:02<00:00, 125.22it/s, loss=4.48, v_num=1, val_loss=4.490, tr[A
Epoch 47:  87%|▊| 340/392 [00:02<00:00, 130.21it/s, loss=4.48, v_num=1, val_loss=4.490, tr[A
Epoch 47:  92%|▉| 360/392 [00:02<00:00, 124.15it/s, loss=4.48, v_num=1, val_loss=4.490, tr[A
Epoch 47:  97%|▉| 380/392 [00:03<00:00, 122.74it/s, loss=4.48, v_num=1, val_loss=4.490, tr[A
Epoch 47: 100%|█| 392/392 [00:03<00:00, 120.90it/s, loss=4.42, v_num=1, val_loss=4.490, tr[A
Epoch 48:  61%|▌| 240/392 [00:02<00:01, 85.96it/s, loss=4.47, v_num=1, val_loss=4.490, tra[A

In [None]:
X_fname = f"{bigboy.data_path}/modeling_features.npy"
model_fname = f"{bigboy.data_path}/{expt_name}/model-v1.ckpt"
system = SigmoidNet(config)

model = system.load_from_checkpoint(model_fname)

In [None]:
X_embedded_mse = torch.squeeze(model(torch.unsqueeze(torch.from_numpy(np.load(X_fname)).float(), 1))).detach().numpy()
print(X_embedded_mse.shape)

plt.figure()
mse_boundary = 24945

mf = plt.scatter(X_embedded_mse[:mse_boundary, 0], X_embedded_mse[:mse_boundary, 1], color='blue', s=20)
mb = plt.scatter(X_embedded_mse[mse_boundary:, 0], X_embedded_mse[mse_boundary:, 1], color='red', s=10)

plt.legend((mf, mb), ('Synth MF', 'Synth MB'), loc='upper right')

border = -0.25
plt.vlines(border, -.55, -.2)

plt.show()

In [None]:
dim = 0
print("% of MF that are over:", np.sum(X_embedded_mse[:mse_boundary, dim] > border) / X_embedded_mse[:mse_boundary, dim].size)
print("% of MB that are under:", np.sum(X_embedded_mse[mse_boundary:, dim] < border) / X_embedded_mse[mse_boundary:, dim].size)

print("% of over that are MF:",np.sum(X_embedded_mse[:mse_boundary, dim] > border) / np.sum(X_embedded_mse[:, dim] > border))
print("% of under that are MB:",np.sum(X_embedded_mse[mse_boundary:, dim] < border) / np.sum(X_embedded_mse[:, dim] < border))



1.0 prew