# GloVe
Using the large abstract data encoded with the balanced title tokens.

# Imports and Setup

Common imports and standardized code for importing the relevant data, models, etc., in order to minimize copy-paste/typo errors.

Imports and colab setup

In [None]:
%%capture import_capture --no-stder
# Jupyter magic methods
# For auto-reloading when external modules are changed
%load_ext autoreload
%autoreload 2
# For showing plots inline
%matplotlib inline

# pip installs needed in Colab for arxiv_vixra_models
!pip install wandb
!pip install pytorch-lightning
!pip install unidecode
# Update sklearn
!pip uninstall scikit-learn -y
!pip install -U scikit-learn

from copy import deepcopy

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
pd.set_option(u'float_format', '{:f}'.format)
import pytorch_lightning as pl
from pytorch_lightning import Trainer
from pytorch_lightning.loggers import WandbLogger
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor
import seaborn as sns
import torch
import wandb


`wandb` log in:

In [None]:
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mgarrett361[0m (use `wandb login --relogin` to force relogin)


True

Google drive access

In [None]:
from google.colab import drive
drive.mount("/content/drive", force_remount=True)
# Enter the relevant foldername
FOLDERNAME = '/content/drive/My Drive/ML/arxiv_vixra'
assert FOLDERNAME is not None, "[!] Enter the foldername."
# For importing modules stored in FOLDERNAME or a subdirectory thereof:
import sys
sys.path.append(FOLDERNAME)

Mounted at /content/drive


In [None]:
import arxiv_vixra_models as avm

In [None]:
notebook_model = avm.LitGloVe

Copy data to cwd for speed.

In [None]:
SUBDIR = '/data/data_splits/'
title_tokens_file_name = 'balanced_title_normalized_vocab.feather'
!cp '{FOLDERNAME + SUBDIR + title_tokens_file_name}' .
title_tokens_df = pd.read_feather(title_tokens_file_name)
with open(FOLDERNAME + SUBDIR + 'heatmap_words.txt', 'r') as f:
    heatmap_words = f.read().split()
with open(FOLDERNAME + SUBDIR + 'pca_words.txt', 'r') as f:
    pca_words =f.read().split()
with open(FOLDERNAME + SUBDIR + 'tsne_words.txt', 'r') as f:
    tsne_words = f.read().split()

Computing specs. Save the number of processors to pass as `num_workers` into the Datamodule and cuda availability for other flags.

In [None]:
# GPU. Save availability to IS_CUDA_AVAILABLE.
gpu_info= !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
  IS_CUDA_AVAILABLE = False
else:
  print(f"GPU\n{50 * '-'}\n", gpu_info, '\n')
  IS_CUDA_AVAILABLE = True

# Memory.
from psutil import virtual_memory, cpu_count
ram_gb = virtual_memory().total / 1e9
print(f"Memory\n{50 * '-'}\n", 'Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb), '\n')

# CPU.
print(f"CPU\n{50 * '-'}\n", f'CPU Processors: {cpu_count()}')
# Determine the number of workers to use in the datamodule
NUM_PROCESSORS = cpu_count()

GPU
--------------------------------------------------
 Thu Jan 20 16:54:24 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 495.46       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   32C    P0    24W / 300W |      0MiB / 16160MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-------------------------------

In [None]:
from requests import get
PROJECT = get('http://172.28.0.2:9000/api/sessions').json()[0]['name']
PROJECT = PROJECT.replace('.ipynb', '').replace('Kopie%20van%20', '').replace('Copy%20of%20', '')
print(PROJECT)
ENTITY = 'garrett361'

large_abstract_glove


Create the mapping from words to indices and vice-versa, recalling that 0 and 1 are reserved for padding and `<UNK>`, respectively.

In [None]:
title_word_to_idx = avm.word_to_idx_dict_from_df(title_tokens_df)
title_idx_to_word = avm.idx_to_word_dict_from_df(title_tokens_df)

Load in the relevant co-occurence matrix:

In [None]:
co_matrix = torch.load(FOLDERNAME + SUBDIR + "large_abstract_with_title_mapping_co_matrix_context_5.pt")

# Model Training

Setting hyperparameters and performing a `wandb`-synced training loop.

In [None]:
cyclic_lr_scheduler_args = {'base_lr': 5e-5,
                            'max_lr': 5e-2,
                            'step_size_up': 128,
                            'cycle_momentum': False}
plateau_lr_scheduler_args = {'verbose': True,
                              'patience': 2,
                              'factor': .75}
model_args_dict = {'co_matrix_sparse': co_matrix,
                   'batch_size': 2 ** 21,
                   'num_workers': NUM_PROCESSORS,
                   'pin_memory': IS_CUDA_AVAILABLE,
                   'persistent_workers': True,
                   'save_models_to_wandb': True,
                   'embedding_dim': 256,
                   'lr': 5e-2,
                   'lr_scheduler': 'cyclic',
                   'lr_scheduler_args': cyclic_lr_scheduler_args,
                   'lr_scheduler_interval': 'step'
                   }
model = notebook_model(**model_args_dict)

Training:

In [None]:
trainer = Trainer(logger=WandbLogger(),
                  gpus=-1 if IS_CUDA_AVAILABLE else 0,
                  log_every_n_steps=1,
                  precision=16,
                  profiler='simple',
                  callbacks=[avm.WandbVisualEmbeddingCallback(model=model,
                                                heatmap_words=heatmap_words,
                                                pca_words=pca_words,
                                                tsne_words=tsne_words,
                                                word_to_idx_dict=title_word_to_idx,
                                                idx_to_word_dict=title_idx_to_word,
                                                k=5,
                                                heatmap_title=f'{PROJECT} Cosine Heatmap',
                                                pca_title=f'{PROJECT} PCA',
                                                tsne_title=f'{PROJECT} t-SNE',
                                                ),
                           LearningRateMonitor()
                           ])
with wandb.init(project=PROJECT) as run:
    run.name = f"lr_{model.hparams['lr']}_scheduler_{model_args_dict.get('lr_scheduler', None)}"
    trainer.fit(model)
    plt.close("all")


Using 16bit native Automatic Mixed Precision (AMP)
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.


  | Name              | Type      | Params
------------------------------------------------
0 | word_embedding    | Embedding | 6.1 M 
1 | context_embedding | Embedding | 6.1 M 
------------------------------------------------
12.3 M    Trainable params
0         Non-trainable params
12.3 M    Total params
24.619    Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

Saved at global step: 22
Epoch: 0
Loss: 26.875776290893555



The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 44
Epoch: 1
Loss: 19.43719482421875



The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 66
Epoch: 2
Loss: 10.466347694396973



The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 88
Epoch: 3
Loss: 4.792421340942383



The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 110
Epoch: 4
Loss: 2.157902240753174



The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 132
Epoch: 5
Loss: 1.067243218421936



The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 154
Epoch: 6
Loss: 0.6184804439544678



The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 176
Epoch: 7
Loss: 0.4411987066268921



The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 198
Epoch: 8
Loss: 0.35649731755256653



The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 220
Epoch: 9
Loss: 0.31078508496284485



The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 242
Epoch: 10
Loss: 0.2866342067718506



The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 264
Epoch: 11
Loss: 0.276861310005188



The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 286
Epoch: 12
Loss: 0.27227041125297546



The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 308
Epoch: 13
Loss: 0.25793129205703735



The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 330
Epoch: 14
Loss: 0.23461894690990448



The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 352
Epoch: 15
Loss: 0.20680977404117584



The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 374
Epoch: 16
Loss: 0.17906415462493896



The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 396
Epoch: 17
Loss: 0.1546107679605484



The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 418
Epoch: 18
Loss: 0.13667653501033783



The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 440
Epoch: 19
Loss: 0.126242995262146



The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 462
Epoch: 20
Loss: 0.11851011961698532





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 484
Epoch: 21
Loss: 0.11162988841533661





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 506
Epoch: 22
Loss: 0.10807602852582932





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 528
Epoch: 23
Loss: 0.1068190261721611





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 550
Epoch: 24
Loss: 0.10620004683732986





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 572
Epoch: 25
Loss: 0.10410559922456741





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 594
Epoch: 26
Loss: 0.10074596107006073





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 616
Epoch: 27
Loss: 0.09651343524456024





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 638
Epoch: 28
Loss: 0.09196574240922928





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 660
Epoch: 29
Loss: 0.08876827359199524





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 682
Epoch: 30
Loss: 0.08668938279151917





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 704
Epoch: 31
Loss: 0.0835772156715393





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 726
Epoch: 32
Loss: 0.07874354720115662





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 748
Epoch: 33
Loss: 0.075186587870121





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 770
Epoch: 34
Loss: 0.07360546290874481





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 792
Epoch: 35
Loss: 0.07332951575517654





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 814
Epoch: 36
Loss: 0.07320833951234818





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 836
Epoch: 37
Loss: 0.07265832275152206





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 858
Epoch: 38
Loss: 0.0716966912150383





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 880
Epoch: 39
Loss: 0.07043231278657913





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 902
Epoch: 40
Loss: 0.06932497769594193





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 924
Epoch: 41
Loss: 0.06885118037462234





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 968
Epoch: 43
Loss: 0.06816896051168442





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 990
Epoch: 44
Loss: 0.06362248212099075





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 1012
Epoch: 45
Loss: 0.06050843372941017





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 1034
Epoch: 46
Loss: 0.05949379503726959





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 1122
Epoch: 50
Loss: 0.05924712494015694





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 1144
Epoch: 51
Loss: 0.05889085680246353





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 1232
Epoch: 55
Loss: 0.05775775760412216





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 1254
Epoch: 56
Loss: 0.0540492981672287





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 1276
Epoch: 57
Loss: 0.05207373946905136





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 1298
Epoch: 58
Loss: 0.05149495601654053





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better conver

Saved at global step: 1496
Epoch: 67
Loss: 0.051273323595523834





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 1518
Epoch: 68
Loss: 0.04767809063196182





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 1540
Epoch: 69
Loss: 0.04632199928164482





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 1562
Epoch: 70
Loss: 0.046234261244535446





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better conver

Saved at global step: 1760
Epoch: 79
Loss: 0.04561915248632431





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 1782
Epoch: 80
Loss: 0.04317230358719826





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 1804
Epoch: 81
Loss: 0.0421956367790699





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better conver

Saved at global step: 2024
Epoch: 91
Loss: 0.0415373258292675





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 2046
Epoch: 92
Loss: 0.039698272943496704





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 2068
Epoch: 93
Loss: 0.03924812749028206





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better conver

Saved at global step: 2288
Epoch: 103
Loss: 0.03827163949608803





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 2310
Epoch: 104
Loss: 0.03701065108180046





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better conver

Saved at global step: 2552
Epoch: 115
Loss: 0.03593003749847412





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 2574
Epoch: 116
Loss: 0.03497409448027611





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better conver

Saved at global step: 2816
Epoch: 127
Loss: 0.03394034877419472





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 2838
Epoch: 128
Loss: 0.03354089334607124





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better conver

Saved at global step: 3080
Epoch: 139
Loss: 0.0322868786752224





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better conver

Saved at global step: 3322
Epoch: 150
Loss: 0.0319598987698555





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 3344
Epoch: 151
Loss: 0.031070876866579056





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better conver

Saved at global step: 3586
Epoch: 162
Loss: 0.03055262751877308





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 3608
Epoch: 163
Loss: 0.030273564159870148





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better conver

Saved at global step: 3850
Epoch: 174
Loss: 0.029354626312851906





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better conver

Saved at global step: 4092
Epoch: 185
Loss: 0.029246926307678223





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 4114
Epoch: 186
Loss: 0.028514038771390915





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better conver

Saved at global step: 4356
Epoch: 197
Loss: 0.028062881901860237





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 4378
Epoch: 198
Loss: 0.02797267585992813





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better conver

Saved at global step: 4620
Epoch: 209
Loss: 0.027139835059642792





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better conver

Saved at global step: 4862
Epoch: 220
Loss: 0.027104757726192474





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Saved at global step: 4884
Epoch: 221
Loss: 0.026581035926938057





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better conver

Saved at global step: 5126
Epoch: 232
Loss: 0.026182495057582855





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better conver

Saved at global step: 5390
Epoch: 244
Loss: 0.02553734742105007





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better conver

Saved at global step: 5654
Epoch: 256
Loss: 0.025296924635767937





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better conver

Saved at global step: 5896
Epoch: 267
Loss: 0.025068826973438263





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better conver

Saved at global step: 6160
Epoch: 279
Loss: 0.024811388924717903





The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.




The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better conver

# Loading Best Models

In [None]:
wandb_api = wandb.Api()
notebook_runs = wandb_api.runs(ENTITY + "/" + PROJECT)
run_cats = ('best_loss', 'name', 'wandb_path', 'timestamp')
runs_sort_cat = 'best_loss'
run_state_dict_file_name = 'glove.pt'
run_init_params_file_name = 'model_init_params.pt'

notebook_runs_dict = {key: [] for key in run_cats}

for run in notebook_runs:
    run_json = run.summary._json_dict
    if runs_sort_cat in run_json:
        notebook_runs_dict[runs_sort_cat].append(run_json[runs_sort_cat])
        notebook_runs_dict['name'].append(run.name)
        notebook_runs_dict['wandb_path'].append('/'.join(run.path))
        notebook_runs_dict['timestamp'].append(run_json['_timestamp'])
# See top runs:
notebook_runs_df = pd.DataFrame(notebook_runs_dict).sort_values(by=runs_sort_cat, ascending=True).reset_index(drop=True)
best_model_wandb_path = notebook_runs_df.iloc[0]['wandb_path']
display(notebook_runs_df)
# Write state dict and init params to final models folder.
!cp "{run_state_dict_file_name}" "{FOLDERNAME + '/final_models/' + PROJECT + '_state_dict.pt'}"
!cp "{run_init_params_file_name}" "{FOLDERNAME + '/final_models/' + PROJECT + '_init_params.pt'}"
# Restore best model.
wandb.restore(run_state_dict_file_name, run_path = best_model_wandb_path, replace=True)
wandb.restore(run_init_params_file_name, run_path = best_model_wandb_path, replace=True)
best_model_state_dict = torch.load(run_state_dict_file_name)
best_model_init_params = torch.load(run_init_params_file_name)
best_model = notebook_model(**best_model_init_params)
best_model.load_state_dict(torch.load(run_state_dict_file_name))

Save the state dicts locally and rebuild the corresponding models.

In [None]:
# wandb stores None values in the config dict as a string literal. Need to
# fix these entries, annoyingly.
for key, val in best_model_df.config.items():
    if val == 'None':
        best_model_df.config[key] = None
# Write to disk
glove_file_name = f"glove_dim_{best_model_df.config['embedding_dim']}.pt"
wandb.restore(glove_file_name,
              run_path=best_model_df.wandb_path,
              replace=True)
glove_file_name_suffix = '_'.join(glove_file_name.split('_')[-2:])
# Also copy to the final_models folder
!cp '{glove_file_name}' "{FOLDERNAME + '/final_models/' + PROJECT + '_' + glove_file_name_suffix}"

In [None]:
best_model = notebook_model(**{**best_model_df.config, **{'co_matrix': co_matrix}})
best_model.load_state_dict(torch.load(glove_file_name))

# Visualize

In [None]:
heatmap = avm.embedding_cosine_heatmap(model=best_model,
                                       words=heatmap_words,
                                       word_to_idx=title_word_to_idx)

In [None]:
pca = avm.pca_3d_embedding_plotter_topk(model=best_model,
                                     words=pca_words,
                                     word_to_idx=title_word_to_idx,
                                     idx_to_word=title_idx_to_word,
                                     title='PCA',
                                     k=5)

In [None]:
tsne = avm.tsne_3d_embedding_plotter_topk(model=best_model,
                                     words=tsne_words,
                                     word_to_idx=title_word_to_idx,
                                     idx_to_word=title_idx_to_word,
                                     title='t-SNE',
                                     k=5)

In [None]:
pca.show()

In [None]:
tsne.show()

In [None]:
avm.embedding_utils.topk_analogies_df(best_model,
                                      'newton mechanics heisenberg'.split(),
                                      title_word_to_idx,
                                      title_idx_to_word)