In [26]:
import os, sys

if "adroit" in os.uname()[1]:
    CUSTOM_MODULE_PATH = "/home/jylin/thesis/modules"
else:
    CUSTOM_MODULE_PATH = "/System/Volumes/Data/Users/jesselin/Dropbox/src/thesis/modules"
sys.path.append(CUSTOM_MODULE_PATH)

# custom libraries
from entropy import get_entropy
from my_funcs import *

# usual libraries
import glob
from collections import defaultdict
import argparse
import functools
import time
from tqdm import trange, tqdm
from datetime import datetime

# scientific libraries
import numpy as np

# ML libraries
import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset, Dataset, random_split
import pytorch_lightning as pl
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning.loggers import TensorBoardLogger

# Global variables we expect to be set by commandline
DATA_DIR = None
RES_DIR = None
MAX_LOOP_VAR = None
TENSORBOARD_LOG_DIRNAME = None
EXP_NUM = None
TRIAL_NUM = None
EXP_DIR = None
OUTPUT_DATA_DIR = None
_DEFAULT_NUM_SAMPLES = 2048


In [27]:
# jupyter libraries
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import matplotlib.pyplot as plt
import matplotlib as mpl

In [28]:
INTERACTION_Kc = np.log(1+ np.sqrt(2))/2

In [29]:
RES_DIR = "/Users/jesselin/Dropbox/src/thesis/final/resources"
DATA_DIR = "/Users/jesselin/Dropbox/src/thesis/final/data"

In [30]:
def process_data(data_filename: str, interaction_K: list, num_samples: int, batch_size: int = 10, num_workers: int = 0):
    """ Takes filename of .npz files for 1D simulations and returns the DataLoader object """
    dataset = SpinSequenceDataset(data_filename, interaction_K)
    div, mod = divmod(len(dataset), num_samples)

    # make list of the split proportions
    split_list = [num_samples for x in range(div)]
    split_list.append(mod)

    dataset_after_split = random_split(dataset, split_list)
    train_loader = DataLoader(dataset_after_split[0], batch_size=batch_size, shuffle=True, num_workers=num_workers)

    return train_loader

def preprocess_data(exp_dir: str):
    """ Stopgap hack to just select the longest iteration file from
    one of the experiment directories for 2D old_data. Returns path of file and interaction_K """
    search_term = os.path.join(exp_dir, "lattice*.txt")
    raw_file_list = glob.glob(search_term)
    data_sample_filepath = raw_file_list[-1]
    f = data_sample_filepath.split("_")
    temperature_index = f.index("T") + 1
    interaction_K = 1/float(f[temperature_index])

    return data_sample_filepath, interaction_K

In [31]:
def make_dataloader(dataset: SpinSequenceDataset, num_samples: int, batch_size: int = 10, num_workers: int = 0):
    """ Takes dataset and returns the DataLoader object """
    div, mod = divmod(len(dataset), num_samples)

    # make list of the split proportions
    split_list = [num_samples for x in range(div)]
    split_list.append(mod)

    dataset_after_split = random_split(dataset, split_list)
    train_loader = DataLoader(dataset_after_split[0], batch_size=batch_size, shuffle=True, num_workers=num_workers)

    return train_loader


# 1D

## SR

# for the culture

In [32]:
data_dir = os.path.join(DATA_DIR, "1D/SR")

In [33]:
K = 1.2
dataset = SpinSequenceDataset("/Users/jesselin/Dropbox/src/thesis/final/resources/1D/SR/trainData_K=1.2.npz", K)
dataloader = make_dataloader(dataset, num_samples=1000)
entropy = get_entropy(dimension=1, interaction_K=[1.2, 0., 0.])

In [34]:
for t in np.arange(1)+1:
    log_dir = os.path.join(data_dir, f"macbook_K={K}")
    model = IsingRNN_simple(hidden_size=1, num_layers=1, nonlinearity="tanh", bias_on=False)
    logger = TensorBoardLogger(save_dir=log_dir, name=f"hidden_size=1")
    early_stop = EarlyStopping(monitor="train_loss", stopping_threshold=entropy*0.995, check_on_train_epoch_end=True, min_delta=0.001, patience=10)
    trainer = pl.Trainer(logger=logger, max_epochs=100, callbacks=[early_stop])
    trainer.fit(model, dataloader)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name    | Type          | Params
------------------------------------------
0 | loss_fn | NLLLoss       | 0     
1 | rnn     | RNN           | 2     
2 | fc      | Linear        | 1     
3 | logprob | LogisticLayer | 0     
------------------------------------------
3         Trainable params
0         Non-trainable params
3         Total params
0.000     Total estimated model params size (MB)


Epoch 3: 100%|██████████| 100/100 [00:13<00:00,  7.22it/s, loss=0.291, v_num=9]


# oops, fuckup

In [8]:
data_dir = os.path.join(DATA_DIR, "1D/SR")

In [9]:
K = 1
dataset = SpinSequenceDataset("/Users/jesselin/Dropbox/src/thesis/final/resources/1D/SR/trainData_K=1.npz", K)
dataloader = make_dataloader(dataset, num_samples=1000)
entropy = get_entropy(dimension=1, interaction_K=[1., 0., 0.])

In [11]:
for t in np.arange(9)+1:
    log_dir = os.path.join(data_dir, f"macbook_K={K}")
    model = IsingRNN_simple(hidden_size=1, num_layers=1, nonlinearity="tanh", bias_on=False)
    logger = TensorBoardLogger(save_dir=log_dir, name=f"hidden_size=1")
    early_stop = EarlyStopping(monitor="train_loss", stopping_threshold=entropy, check_on_train_epoch_end=True, min_delta=0.001, patience=10)
    trainer = pl.Trainer(logger=logger, max_epochs=100, callbacks=[early_stop])
    trainer.fit(model, dataloader)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name    | Type          | Params
------------------------------------------
0 | loss_fn | NLLLoss       | 0     
1 | rnn     | RNN           | 2     
2 | fc      | Linear        | 1     
3 | logprob | LogisticLayer | 0     
------------------------------------------
3         Trainable params
0         Non-trainable params
3         Total params
0.000     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 2: 100%|██████████| 100/100 [00:11<00:00,  8.39it/s, loss=0.365, v_num=1]

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name    | Type          | Params
------------------------------------------
0 | loss_fn | NLLLoss       | 0     
1 | rnn     | RNN           | 2     
2 | fc      | Linear        | 1     
3 | logprob | LogisticLayer | 0     
------------------------------------------
3         Trainable params
0         Non-trainable params
3         Total params
0.000     Total estimated model params size (MB)



Epoch 3: 100%|██████████| 100/100 [00:11<00:00,  8.68it/s, loss=0.367, v_num=2]

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name    | Type          | Params
------------------------------------------
0 | loss_fn | NLLLoss       | 0     
1 | rnn     | RNN           | 2     
2 | fc      | Linear        | 1     
3 | logprob | LogisticLayer | 0     
------------------------------------------
3         Trainable params
0         Non-trainable params
3         Total params
0.000     Total estimated model params size (MB)



Epoch 3: 100%|██████████| 100/100 [00:10<00:00,  9.10it/s, loss=0.367, v_num=3]

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name    | Type          | Params
------------------------------------------
0 | loss_fn | NLLLoss       | 0     
1 | rnn     | RNN           | 2     
2 | fc      | Linear        | 1     
3 | logprob | LogisticLayer | 0     
------------------------------------------
3         Trainable params
0         Non-trainable params
3         Total params
0.000     Total estimated model params size (MB)



Epoch 1: 100%|██████████| 100/100 [00:10<00:00,  9.14it/s, loss=0.371, v_num=4]

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name    | Type          | Params
------------------------------------------
0 | loss_fn | NLLLoss       | 0     
1 | rnn     | RNN           | 2     
2 | fc      | Linear        | 1     
3 | logprob | LogisticLayer | 0     
------------------------------------------
3         Trainable params
0         Non-trainable params
3         Total params
0.000     Total estimated model params size (MB)



Epoch 4: 100%|██████████| 100/100 [00:10<00:00,  9.50it/s, loss=0.367, v_num=5]

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name    | Type          | Params
------------------------------------------
0 | loss_fn | NLLLoss       | 0     
1 | rnn     | RNN           | 2     
2 | fc      | Linear        | 1     
3 | logprob | LogisticLayer | 0     
------------------------------------------
3         Trainable params
0         Non-trainable params
3         Total params
0.000     Total estimated model params size (MB)



Epoch 3: 100%|██████████| 100/100 [00:10<00:00,  9.65it/s, loss=0.368, v_num=6]

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name    | Type          | Params
------------------------------------------
0 | loss_fn | NLLLoss       | 0     
1 | rnn     | RNN           | 2     
2 | fc      | Linear        | 1     
3 | logprob | LogisticLayer | 0     
------------------------------------------
3         Trainable params
0         Non-trainable params
3         Total params
0.000     Total estimated model params size (MB)



Epoch 6: 100%|██████████| 100/100 [00:14<00:00,  7.11it/s, loss=0.364, v_num=7]

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name    | Type          | Params
------------------------------------------
0 | loss_fn | NLLLoss       | 0     
1 | rnn     | RNN           | 2     
2 | fc      | Linear        | 1     
3 | logprob | LogisticLayer | 0     
------------------------------------------
3         Trainable params
0         Non-trainable params
3         Total params
0.000     Total estimated model params size (MB)



Epoch 4: 100%|██████████| 100/100 [00:12<00:00,  8.31it/s, loss=0.364, v_num=8]

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name    | Type          | Params
------------------------------------------
0 | loss_fn | NLLLoss       | 0     
1 | rnn     | RNN           | 2     
2 | fc      | Linear        | 1     
3 | logprob | LogisticLayer | 0     
------------------------------------------
3         Trainable params
0         Non-trainable params
3         Total params
0.000     Total estimated model params size (MB)



Epoch 4: 100%|██████████| 100/100 [00:11<00:00,  8.67it/s, loss=0.367, v_num=9]


### Low temperature test

In [7]:
data_dir = os.path.join(DATA_DIR, "1D/SR")

In [8]:
data = np.load("1dNNMC_K=2.npz")["data"]
K = 2
data = torch.tensor(data)
dataset = SpinSequenceDataset("1dNNMC_K=2.npz", K)
dataloader = make_dataloader(dataset, num_samples=1000)
entropy = get_entropy(dimension=1, interaction_K=[2., 0., 0.])

nearest_neighbor_bool True
k2_bool False
k3_bool False


In [9]:
hidden_size = 1
for trial in np.arange(2)+1:
    log_dir = os.path.join(data_dir, f"macbook_K={K}")
    model = IsingRNN_simple(hidden_size=hidden_size, num_layers=1, nonlinearity="tanh", bias_on=False)
    logger = TensorBoardLogger(save_dir=log_dir, name=f"hidden_size={hidden_size}")
    early_stop = EarlyStopping(monitor="train_loss", stopping_threshold=entropy, check_on_train_epoch_end=True, min_delta=0.001, patience=10)
    trainer = pl.Trainer(logger=logger, max_epochs=100, callbacks=[early_stop])
    trainer.fit(model, dataloader)


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name    | Type          | Params
------------------------------------------
0 | loss_fn | NLLLoss       | 0     
1 | rnn     | RNN           | 2     
2 | fc      | Linear        | 1     
3 | logprob | LogisticLayer | 0     
------------------------------------------
3         Trainable params
0         Non-trainable params
3         Total params
0.000     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 7: 100%|██████████| 100/100 [00:21<00:00,  4.61it/s, loss=0.0936, v_num=8]

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name    | Type          | Params
------------------------------------------
0 | loss_fn | NLLLoss       | 0     
1 | rnn     | RNN           | 2     
2 | fc      | Linear        | 1     
3 | logprob | LogisticLayer | 0     
------------------------------------------
3         Trainable params
0         Non-trainable params
3         Total params
0.000     Total estimated model params size (MB)



Epoch 10: 100%|██████████| 100/100 [00:17<00:00,  5.76it/s, loss=0.0915, v_num=9]


## LR

In [10]:
def get_interaction_K(resource_filepath):
    basename, file_ext = os.path.splitext(os.path.basename(resource_filepath))
    interaction_K_str = basename[basename.index("K=")+2:]
    interaction_K_str = interaction_K_str.strip("[]").split(" ")
    interaction_K = [float(s) for s in interaction_K_str if s != ""]
    return interaction_K

In [None]:
for f in raw_file_list:
    interaction_K = get_interaction_K(f)
    log_dir = os.path.join(data_dir, f"macbook_K={str(interaction_K)}")

    dataset = SpinSequenceDataset(f, interaction_K=interaction_K)
    dataloader = make_dataloader(dataset, num_samples=100)
    for hidden_size in np.arange(5)+1:
        model = IsingRNN_simple(hidden_size=hidden_size, num_layers=1, nonlinearity="tanh", bias_on=False)
        logger = TensorBoardLogger(save_dir=log_dir, name=f"hidden_size={hidden_size}")
        trainer = pl.Trainer(logger=logger, max_epochs=100)
        trainer.fit(model, dataloader)


Repeatability of 4-state

In [14]:
res_dir = os.path.join(RES_DIR, "1D/LR")
search_term = "arr_sampledata*"
raw_file_list = glob.glob(os.path.join(res_dir, search_term))

data_dir = os.path.join(DATA_DIR, "1D/LR")
data_filepath = "/Users/jesselin/Dropbox/src/thesis/final/resources/1D/LR/arr_sampledata_K=[1.  0.1].npz"
K = [1., 0.1, 0.0]
dataset = SpinSequenceDataset(data_filepath, K)
dataloader = make_dataloader(dataset, num_samples=1000)

In [None]:
hidden_size = 1
log_dir = os.path.join(data_dir, f"macbook_K={K}")
entropy = float(get_entropy(dimension=1, interaction_K=K))
for trials in np.arange(10):
    model = IsingRNN_simple(hidden_size=hidden_size, num_layers=1, nonlinearity="tanh", bias_on=False)
    logger = TensorBoardLogger(save_dir=log_dir, name=f"hidden_size={hidden_size}")
    early_stop = EarlyStopping(monitor="train_loss", check_on_train_epoch_end=True, stopping_threshold=entropy, min_delta=0.001, patience=10)
    trainer = pl.Trainer(logger=logger, max_epochs=100, callbacks=[early_stop])
    trainer.fit(model, dataloader)

# Lack of 6-state for $k=3$

In [34]:
res_dir = os.path.join(RES_DIR, "1D/LR")
search_term = "arr_sampledata*"
raw_file_list = glob.glob(os.path.join(res_dir, search_term))

data_dir = os.path.join(DATA_DIR, "1D/LR")
data_filepath = "/Users/jesselin/Dropbox/src/thesis/final/resources/1D/LR/arr_sampledata_K=[1.  0.7 0.2].npz"
K = [1., 0.7, 0.2]
dataset = SpinSequenceDataset(data_filepath, K)
dataloader = make_dataloader(dataset, num_samples=1000)

In [None]:
for hidden_size in np.arange(2)+1:
    log_dir = os.path.join(data_dir, f"macbook_K={K}")
    entropy = float(get_entropy(dimension=1, interaction_K=K))
    for trials in np.arange(10):
        model = IsingRNN_simple(hidden_size=hidden_size, num_layers=1, nonlinearity="tanh", bias_on=False)
        logger = TensorBoardLogger(save_dir=log_dir, name=f"hidden_size={hidden_size}")
        early_stop = EarlyStopping(monitor="train_loss", check_on_train_epoch_end=True, stopping_threshold=entropy, min_delta=0.001, patience=10)
        trainer = pl.Trainer(logger=logger, max_epochs=100, callbacks=[early_stop])
        trainer.fit(model, dataloader)

# 2D

## Process data

In [7]:
kfrac_list = 0.9 + np.arange(21)*0.01

In [8]:
res_dir_2d = os.path.join(RES_DIR, "2D")
res_dir_compiled_tensor = os.path.join(res_dir_2d, "compiled tensors")

In [9]:
search_term = "compiled*"
raw_file_list = glob.glob(os.path.join(res_dir_compiled_tensor, search_term))
# assemble dictionary
def get_exp_num(filepath):
    basename = os.path.basename(f)
    basename, file_ext = os.path.splitext(basename)
    exp_num = basename.split("_")[4].split("=")[-1]
    return int(exp_num)

def get_temperature(filepath) -> str:
    basename = os.path.basename(f)
    basename, file_ext = os.path.splitext(basename)
    temperature_str = basename.split("_")[5].split("=")[-1]
    return temperature_str

file_dict = {}
for f in raw_file_list:
    temperature = get_temperature(f)
    kfrac_str = str(round(1/float(temperature)/INTERACTION_Kc,2))
    file_dict[kfrac_str] = f

In [10]:
kfrac = float(1)
dataset = SpinSequenceDataset(file_dict["1.0"], interaction_K=[1])

In [11]:
dataloader = make_dataloader(dataset, num_samples=10)

In [12]:
test = next(iter(dataloader))

## Train RNN

In [7]:
res_dir = os.path.join(RES_DIR, "2D/compiled extra tensors")
search_term = "compiled_extra*"
raw_file_list = glob.glob(os.path.join(res_dir, search_term))
data_dir = os.path.join(DATA_DIR, "2D")

In [8]:
search_term = "compiled*"
raw_file_list = glob.glob(os.path.join(res_dir, search_term))
# assemble dictionary
def get_exp_num(filepath):
    basename = os.path.basename(f)
    basename, file_ext = os.path.splitext(basename)
    exp_num = basename.split("_")[4].split("=")[-1]
    return int(exp_num)

def get_temperature(f):
    f, _ = os.path.splitext(f)
    temperature = f[f.index("T=")+2:]
    return temperature

file_dict = {}
for f in raw_file_list:
    temperature = get_temperature(f)
    kfrac_str = str(round(1/float(temperature)/INTERACTION_Kc,2))
    file_dict[kfrac_str] = f

In [9]:
kfrac = float(1)
dataset = SpinSequenceDataset(file_dict["1.0"], interaction_K=[1])

In [10]:
dataloader = make_dataloader(dataset, num_samples=100)

In [11]:
hidden_size = 10

log_dir = os.path.join(data_dir, f"macbook_kfrac={str(kfrac)}")
model = IsingRNN_simple(hidden_size=hidden_size, num_layers=1, nonlinearity="tanh", bias_on=False)
logger = TensorBoardLogger(save_dir=log_dir, name=f"hidden_size={hidden_size}")
early_stop = EarlyStopping(monitor="train_loss", check_on_train_epoch_end=True, min_delta=0.001, patience=10)
trainer = pl.Trainer(logger=logger, max_epochs=100, callbacks=[early_stop])
trainer.fit(model, dataloader)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name    | Type          | Params
------------------------------------------
0 | loss_fn | NLLLoss       | 0     
1 | rnn     | RNN           | 110   
2 | fc      | Linear        | 10    
3 | logprob | LogisticLayer | 0     
------------------------------------------
120       Trainable params
0         Non-trainable params
120       Total params
0.000     Total estimated model params size (MB)
  rank_zero_warn(
  rank_zero_warn(


Epoch 22: 100%|██████████| 10/10 [00:06<00:00,  1.52it/s, loss=0.381, v_num=0]


In [13]:
hidden_size = 10

log_dir = os.path.join(data_dir, f"macbook_kfrac={str(kfrac)}")
model = IsingRNN_simple(hidden_size=hidden_size, num_layers=10, nonlinearity="tanh", bias_on=False)
logger = TensorBoardLogger(save_dir=log_dir, name=f"hidden_size={hidden_size}")
early_stop = EarlyStopping(monitor="train_loss", check_on_train_epoch_end=True, min_delta=0.001, patience=10)
trainer = pl.Trainer(logger=logger, max_epochs=100, callbacks=[early_stop])
trainer.fit(model, dataloader)


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name    | Type          | Params
------------------------------------------
0 | loss_fn | NLLLoss       | 0     
1 | rnn     | RNN           | 1.9 K 
2 | fc      | Linear        | 10    
3 | logprob | LogisticLayer | 0     
------------------------------------------
1.9 K     Trainable params
0         Non-trainable params
1.9 K     Total params
0.008     Total estimated model params size (MB)


Epoch 13:   0%|          | 0/10 [00:39<?, ?it/s, loss=0.379, v_num=1]_num=2] 
Epoch 23: 100%|██████████| 10/10 [00:11<00:00,  1.11s/it, loss=0.38, v_num=2] 
