In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

from tqdm import tqdm
import numpy as np
import torch
import torch.nn as nn
from torch.nn import functional as F
import pandas as pd
from torch.utils.data.dataloader import DataLoader

import math
from torch.utils.data import Dataset

from attentionVis import AttentionVis
from model import GPT, GPTConfig
from trainer import Trainer, TrainerConfig
from utils import set_seed

# import mplcyberpunk
%matplotlib inline
# plt.style.use('cyberpunk')

parent_path = os.path.dirname(os.path.dirname(os.getcwd())) + "/"
plt.style.use(['default'])

In [2]:
# set up logging
import logging
logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        level=logging.INFO,
)

In [3]:
N_MODELS = 10
max_epochs = 1

In [4]:
def set_plot_params():
    plt.rcParams['font.family'] = 'serif'
    plt.rcParams['font.serif'] = 'Ubuntu'
    plt.rcParams['font.monospace'] = 'Ubuntu mono'
    plt.rcParams['font.size'] = 16
    plt.rcParams['text.color'] = 'white'
    plt.rcParams['axes.labelcolor'] = 'white'
    plt.rcParams['xtick.color'] = 'white'
    plt.rcParams['ytick.color'] = 'white'
    plt.rcParams['axes.labelsize'] = 12
    plt.rcParams['axes.labelweight'] = 'bold'
    plt.rcParams["figure.facecolor"] = 'white'     # '202020'
    plt.rcParams['axes.facecolor']= 'white'     # '202020'
    plt.rcParams['savefig.facecolor']= 'white'     # '202020'
    plt.rcParams['xtick.labelsize'] = 10
    plt.rcParams['ytick.labelsize'] = 10
    plt.rcParams['legend.fontsize'] = 14
    plt.rcParams['figure.titlesize'] = 16

set_plot_params()

In [5]:
params = {'legend.fontsize': 20,
          'figure.figsize': (15, 5),
         'axes.labelsize': 25,
         'axes.titlesize':'xx-large',
         'xtick.labelsize':20,
         'ytick.labelsize':20,
         'legend.fancybox':True}
plt.rcParams.update(params)

In [6]:
def set_plot_params():
    plt.rcParams['font.family'] = 'serif'
    plt.rcParams['font.serif'] = 'Ubuntu'
    plt.rcParams['font.monospace'] = 'Ubuntu mono'
    plt.rcParams['font.size'] = 16
    plt.rcParams['text.color'] = 'white'
    plt.rcParams['axes.labelcolor'] = 'white'
    plt.rcParams['xtick.color'] = 'white'
    plt.rcParams['ytick.color'] = 'white'
    plt.rcParams['axes.labelsize'] = 12
    plt.rcParams['axes.labelweight'] = 'bold'
    plt.rcParams["figure.facecolor"] = '202020'
    plt.rcParams['axes.facecolor']= '202020'
    plt.rcParams['savefig.facecolor']= '202020'
    plt.rcParams['xtick.labelsize'] = 10
    plt.rcParams['ytick.labelsize'] = 10
    plt.rcParams['legend.fontsize'] = 14
    plt.rcParams['figure.titlesize'] = 16

set_plot_params()

In [7]:
block_size = 2000  # small window for faster training

# load and process data
path = parent_path + "code/data/SimulationForTest/SpikeTime_hub1Conn.csv"
df = pd.read_csv(path)

spikes = df.iloc[:, 1].astype(int)
dt = df.iloc[:, 0].diff().fillna(0)
dt = (dt - dt.mean()) / dt.std()

neurons = sorted(list(set(spikes)))
stoi = { ch:i for i,ch in enumerate(neurons) }
itos = { i:ch for i,ch in enumerate(neurons) }

train_len = round(len(spikes)*(3/5))
test_len = round(len(spikes) - train_len)
train_df = spikes[:train_len]
train_dt = dt[:train_len]
test_df = spikes[train_len:train_len + test_len]
test_dt = dt[train_len:train_len + test_len]

In [8]:
len(test_dt), len(test_df)

(7168, 7168)

In [9]:
from spikeTimeUtils import spikeTimeData

train_dataset = spikeTimeData(train_df, block_size, train_dt, stoi, itos)
test_dataset = spikeTimeData(test_df, block_size, test_dt, stoi, itos)

data has 10753 characters, 300 unique.
data has 7168 characters, 300 unique.


# Plotting Attention
## Training with position + temporal embeddings

In [10]:
def train_models(n, mconf, tconf):
    models = []
    train_losses = []
    test_losses = []
    for i in range(n):
        set_seed(i)
        model = GPT(mconf)
        trainer = Trainer(model, train_dataset, test_dataset, tconf)
        trainer.train()

        models.append(model.eval().to('cpu'))
        train_losses.append(trainer.train_losses)
        test_losses.append(trainer.test_losses)
    return models, train_losses, test_losses

In [11]:
def att_models(models, dataset):
    models_atts = []
    for model in models:
        attention_scores = np.zeros(len(neurons))
        data = dataset
        pbar = tqdm(enumerate(data), total=len(data))
        for it, (x, y) in pbar:
            # scores = np.array(np.zeros(len(neurons)))
            att = np.zeros(len(neurons))
            score = AttentionVis.getAttention(x, model)
            # take attentions from last step
            if score.size >= 1: score = score[-1]
            # scores.append(score)
            for idx, neuron in enumerate(x[:, 0]):
                """ 
                for each neuron in scores,
                add its score to the array
                """
                neuron = int(neuron.item())
                att[neuron] += score[idx]
            attention_scores = np.vstack((attention_scores, att))
            if it > len(dataset):
                models_atts.append(attention_scores.sum(axis=0))
                break
    return models_atts


def att_inter(model, dataset):
    attention_scores = np.zeros((len(neurons), len(neurons)))
    pbar = tqdm(enumerate(dataset), total=len(dataset))
    for it, (x, y) in pbar:
        x_id = x[:, 0].long()
        score = AttentionVis.getAttention(x, model)
        print(score.shape)
        att = np.zeros((len(neurons), len(neurons)))
        for step in range(len(score)):
            step_score = score[step]
            xid_step = int(x_id[step])
            att[xid_step][list(x_id)] += step_score
        attention_scores += att
        if it > len(dataset):
            break
    return attention_scores


def get_att_freqs(model, dataset):
    attention_scores = np.zeros((len(neurons), len(neurons)))
    inter_frequencies = np.zeros((len(neurons), len(neurons)))
    pbar = tqdm(enumerate(dataset), total=len(dataset))
    for it, (x, y) in pbar:
        x_id = x[:, 0].long()
        score = AttentionVis.getAttention(x, model)
        att = np.zeros((len(neurons), len(neurons)))
        freqs = np.zeros((len(neurons), len(neurons)))
        for step in [-1]:    #   range(len(score)):
            step_score = score[step]
            xid_step = int(x_id[step])
            att[xid_step][list(x_id)] += step_score
            freqs[xid_step][x_id] += 1
        attention_scores += att
        inter_frequencies += freqs
        if it > len(dataset):
            break
    return attention_scores, inter_frequencies

In [12]:
def att_inter_freqs(dataset):
    block_size = 100
    neuron_inter_freqs = np.zeros((len(neurons), len(neurons)))
    pbar = tqdm(enumerate(dataset), total=len(dataset))
    for it, (x, y) in pbar:
        if it >= len(dataset):
            break
        x_id = x[:, 0].long()
        freq = np.zeros((len(neurons), len(neurons)))
        for step in range(block_size):
            xid_step = int(x_id[step])
            x_id_prev = list(x_id[:step])
            freq[xid_step][x_id_prev] += 1
        neuron_inter_freqs += freq
    return neuron_inter_freqs

In [13]:
tconf = TrainerConfig(max_epochs=1, batch_size=64, learning_rate=1e-3, num_workers=0)

In [14]:
mconfPT = GPTConfig(train_dataset.population_size, train_dataset.block_size, max_epochs=1,
                  n_layer=2, n_head=2, n_embd=128, pos_emb=True, temp_emb=True)

models_trainPT, models_train_lossesPT, models_test_lossesPT = train_models(3, mconfPT, tconf)

05/19/2022 00:23:26 - INFO - model -   number of parameters: 4.736000e+05
  0%|          | 0/137 [00:00<?, ?it/s]


RuntimeError: CUDA out of memory. Tried to allocate 1.91 GiB (GPU 0; 10.73 GiB total capacity; 7.76 GiB already allocated; 103.69 MiB free; 8.19 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [None]:
x, y = next(iter(train_dataset))

In [None]:
x[:, 0].shape

In [None]:
score = AttentionVis.getAttention(next(iter(train_dataset))[0], models_trainPT[0])
score.shape

In [None]:
models_trainPT_att = att_models(models_trainPT, train_dataset)

In [None]:
models_trainPT_att[1].shape 

In [None]:
plt.rcParams['xtick.labelsize'] = 25
plt.rcParams['ytick.labelsize'] = 25
plt.rcParams['axes.labelsize'] = 25
plt.rcParams['figure.titlesize'] = 25
plt.rcParams['axes.labelpad'] = 17
plt.rcParams['font.family'] = 'serif'
# plt.rcParams['patch.set_facecolor'] = 'white'
# plt.rcParams['savefig.facecolor']= 'white'
plt.rcParams['axes.titlepad'] = 25 

plt.figure(figsize=(20,10))
plt.ylabel('Attention (aggregate)', labelpad=15)
plt.xlabel('Neuron ID', labelpad=10)
for idx, att in enumerate(models_trainPT_att):
    plt.scatter(np.arange(len(neurons)), att, marker='x',
                          alpha=0.5, label=idx)
    # plt.legend()

plt.tight_layout()
plt.savefig('attentions-nl.png', dpi=400)

In [None]:
# NOTE: AVERAGE ATTENTIONS OVER THE MODEL ENSEMBLES. 

In [None]:
plt.tight_layout()
plt.savefig('attentions-nl.png', dpi=400)

In [None]:
spike_freq = np.zeros(len(neurons))
for i in train_df:
    spike_freq[i] += 1

plt.figure(figsize=(20,10))
plt.ylabel('Frequency', labelpad=10)
plt.xlabel('Neuron ID', labelpad=5)
plt.scatter(np.arange(len(neurons)), spike_freq, marker='x')

In [None]:
plt.rcParams['xtick.labelsize'] = 25
plt.rcParams['ytick.labelsize'] = 25
plt.rcParams['axes.labelsize'] = 25
plt.rcParams['figure.titlesize'] = 25
plt.rcParams['axes.labelpad'] = 17
plt.rcParams['font.family'] = 'serif'
# plt.rcParams['patch.set_facecolor'] = 'white'
plt.rcParams['savefig.facecolor']= 'white'
plt.rcParams['axes.titlepad'] = 25 

In [None]:
plt.figure(figsize=(20,10))
plt.ylabel('Total Attention / Frequency', labelpad=10)
plt.xlabel('Neuron ID', labelpad=5)
for idx, att in enumerate(models_trainPT_att):
    plt.scatter(np.arange(len(neurons)), np.divide(att,spike_freq), marker='x', alpha=0.5, label=idx)
    plt.legend()

## Training With Temporal Embeddings Only

In [15]:
mconfT = GPTConfig(train_dataset.population_size, train_dataset.block_size, max_epochs=1,
                  n_layer=2, n_head=2, n_embd=128, pos_emb=False, temp_emb=True)

In [16]:
modelsT_train, modelsT_train_losses, modelsT_test_losses = train_models(4, mconfT, tconf)

05/19/2022 00:23:29 - INFO - model -   number of parameters: 4.736000e+05
  0%|          | 0/137 [00:00<?, ?it/s]


RuntimeError: CUDA out of memory. Tried to allocate 1.91 GiB (GPU 0; 10.73 GiB total capacity; 8.18 GiB already allocated; 97.69 MiB free; 8.20 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [None]:
inter_atts = None
inter_freqs = None

for model in modelsT_train:
    inter_att, inter_freq = get_att_freqs(model, train_dataset)
    if inter_atts is None:
        inter_atts = inter_att
        inter_freqs = inter_freq
    else:
        inter_atts += inter_att
        inter_freqs += inter_freq

plt.imshow(inter_atts / inter_freqs)
plt.colorbar()

n = 11
plt.plot(inter_atts[n] / inter_freqs[n])

In [None]:

av_attention = inter_atts / inter_freqs
plt.title('Hub Attention Matrix (1000 block_size')
plt.imshow(av_attention)
plt.colorbar()


In [None]:
# import scipy.io

# scipy.io.savemat('inter_att_freqs.mat', mdict={'inter_att_freqs': inter_freqs})
# scipy.io.savemat('inter_atts.mat', mdict={'inter_atts': inter_atts})

In [None]:
n = 13
plt.plot(inter_atts[n] / inter_freqs[n])

In [None]:
modelsT_att = att_models(modelsT_train, train_dataset)

## Overall Attention Per Neuron

In [None]:
np.sum(modelsT_att, axis=0)

In [None]:
from utils import set_plot_white

set_plot_white()
plt.rcParams['axes.labelweight'] = 'bold'

def moving_average(a,n):
    N=len(a)
    return np.array([np.mean(a[i:i+n]) for i in np.arange(0,N-n+1)])

plt.figure(figsize=(20,14))
plt.ylabel('Total Attention', labelpad=15)
plt.xlabel('Neuron ID', labelpad=10)
for idx, att in enumerate(modelsT_att):
    plt.scatter(np.arange(len(neurons)), att, marker='x',
                          alpha=0.9, label=idx, s=300)
    plt.legend()

model_att_av = np.mean(modelsT_att, axis=0)
n_mean = 10
model_att_av_smooth = moving_average(model_att_av, n_mean)
plt.plot(np.arange(len(model_att_av_smooth)) + n_mean - 5, model_att_av_smooth, lw=5, linestyle='--', color='black', alpha=0.7)
# plt.plot(np.arange(len(neurons), model_att_av, lw=10, linestyle='--', color='black')
plt.title('Attention Structure - Hub Network', fontsize=25)

# plt.savefig('attentions.png', dpi=400)

## Neuron ID Distribution

In [None]:
plt.figure(figsize=(20,10))
plt.ylabel('Frequency', labelpad=10)
plt.xlabel('Neuron ID', labelpad=5)
plt.scatter(np.arange(len(neurons)), spike_freq, marker='x')

## Frequency Averaged Attention Per Neuron

In [None]:
len(train_dataset)

In [None]:
from tqdm import tqdm

pbar = tqdm(enumerate(train_dataset), total=len(train_dataset))

spike_freq = np.zeros(len(neurons))
for it, (x, y) in pbar:
    spike_freq[x[:, 0].long().flatten().tolist()] += 1
    if it > len(train_dataset):
        break


In [None]:
plt.figure(figsize=(20,14))
plt.ylabel('Average Attention', labelpad=10)
plt.xlabel('Neuron ID', labelpad=5)
for idx, att in enumerate(modelsT_att):
    plt.scatter(np.arange(len(neurons)), np.divide(att,spike_freq), marker='x', alpha=0.8, label=idx, s=400)
    plt.legend(title='Model Seed')

model_att_av = np.mean(modelsT_att, axis=0) / spike_freq
n_mean = 5
model_att_av_smooth = moving_average(model_att_av, n_mean)
plt.plot(np.arange(len(model_att_av_smooth)) + n_mean - 3, model_att_av_smooth, lw=5, linestyle='--', color='black', alpha=0.7)
plt.title('Attention Structure - Hubb Network', fontsize=25)
plt.savefig('attentions_normalized.png', dpi=400)

In [None]:
plt.title('Dataset Distribution')
plt.xlabel('Neuron ID', labelpad=5)
plt.ylabel('Frequency', labelpad=10)
plt.bar(np.arange(len(neurons)), spike_freq, alpha=0.5)

In [None]:
inter_atts = None

for model in modelsT_train:
    inter_att = att_inter(model, train_dataset)
    if inter_atts is None:
        inter_atts = inter_att
    else:
        inter_atts += inter_att
