In [None]:
%reload_ext autoreload
%autoreload 2

In [None]:
import numpy as np
import os
import csv
from scipy import stats
import pickle
import matplotlib
import matplotlib.pyplot as plt
from gensim.corpora import Dictionary
from biohasher import Biohasher

from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

In [None]:
synapse_file = '../data/synapses/OWT_hid_400_W_11_LR_0.0002_14.npy'
tokenizer_file = '../data/tokenizer/gensim1_patched.dict'
stopword_file = '../data/tokenizer/exceptional_ids_terrier_stop.npy'
idx_order_file = "../data/indices_of_memories_Ben.npy"
normalize_synapses = True
biohasher = Biohasher(synapse_file, tokenizer_file, stopword_file=stopword_file, normalize_synapses=normalize_synapses)

In [None]:
memidxs = np.load(idx_order_file)

In [None]:
memidxs.tolist()

In [None]:
biohasher.synapses

In [None]:
N_voc=20000
N=2*N_voc
hid=400
prec=1.0e-32
p=2

synapses=np.load('../data/synapses/OWT_hid_400_W_11_LR_0.0002_14.npy')
print(synapses.shape, N)

def normalize(syn):
    [K,N]=syn.shape
    nc=np.power(np.sum(syn**p,axis=1),1/p).reshape(K,1)
    syn=syn/np.tile(nc+prec,(1,N))
    return syn

synapses=normalize(synapses)

In [None]:
#VOC = Dictionary.load('/REMOTE/OpenWebText/models/gensim1_patched.dict')
VOC = Dictionary.load('../data/tokenizer/gensim1_patched.dict')
exceptional_tokens=np.load('../data/tokenizer/exceptional_ids_terrier_stop.npy')    

N_VOC=len(VOC)
print(N_VOC)

tok2id={}
for i in range(N_VOC):
    tok2id[VOC[i]]=i

In [None]:
#phrase = 'boat on the bank of the river'
#phrase = 'apple latest iphone'
#phrase = 'money in bank checking account'
#phrase = 'the company stock is training high'
#phrase = 'sweet crispy apple pie'
phrase = 'But that dialogue did not reflect the state of her marriage Kidman told the magazine'
#phrase = 'nation senior military leaders should quarantine after they were advised that Admiral Charles Ray with whom they had met with at a Pentagon meeting had tested positive for the virus'
#phrase = 'my research focuses on the computational properties of neural networks'
#phrase = 'trump administration campaign rally in oklahoma'
#phrase = 'ibm corporation to acquire opensource software startup'
#phrase = 'local government officials responded promptly to protests'
#phrase = 'influenza virus outbreak in public schools'

# Tokenize the phrase
v = np.zeros((N,1))
for w in phrase.split(' '):
    w = w.lower()
    if w in tok2id.keys():
        print(w, tok2id[w], tok2id[w] not in exceptional_tokens)
        if tok2id[w] not in exceptional_tokens:
            v[tok2id[w],0] = 1.
        
        
        
print(np.nonzero(v))        
nc = np.sqrt(np.sum(v*v))
v = v/nc

In [None]:
def softmax(x: np.array, beta=1.0):
    v = np.exp(beta*x)
    return v / np.sum(v)

In [None]:
def show_head_contribution(ind_head, beta=10.0, beta_tar=800.0, beta_con=10.0, force_mem=True):
    OV = np.dot(synapses,v)
    SM = softmax(OV, beta)
    mem_ordered = np.argsort(-SM[:,0])
    if force_mem:
        head = ind_head
    else:
        head = mem_ordered[ind_head]
    print("Displaying results for head: ", head)
    print('Contribution of the chosen head', 100*SM[head,0])


    RHS_tar= synapses[head,N_voc:]
    RHS_con= synapses[head,:N_voc]

    Z_out = np.sum(np.exp(beta_tar*RHS_tar))
    RHS_tar_norm = np.exp(beta_tar*RHS_tar)/Z_out

    N_show = 20
    print('\nTARGET')
    for ID in np.argsort(-RHS_tar_norm)[:N_show]:
        print(ID, VOC[ID], int(ID) not in exceptional_tokens, 100*RHS_tar_norm[ID])

    Z_out = np.sum(np.exp(beta_con*RHS_con))
    RHS_con_norm = np.exp(beta_con*RHS_con)/Z_out

    N_show = 20
    print('\nCONTEXT')
    for ID in np.argsort(-RHS_con_norm)[:N_show]:
        print(ID, VOC[ID], 100*RHS_con_norm[ID])

interact(show_head_contribution, 
         ind_head=widgets.BoundedIntText(
            value=7,
            min=0,
            max=biohasher.n_heads-1,
            step=1,
            description='Which head_ind:',
            disabled=False
        ),
#          ind_head=widgets.IntSlider(min=0, max=400, step=1, value=0), 
         beta=widgets.FloatSlider(min=0.05, max=20.0, step=0.2, value=10.0),
         beta_tar=widgets.FloatSlider(min=10, max=1000.0, step=20, value=800.0),
         beta_con=widgets.FloatSlider(min=0.5, max=50, step=2, value=10),
        )

In [None]:
beta = 10.0
OV = np.dot(synapses,v)
Z = np.sum(np.exp(beta*OV))
SM = np.exp(beta*OV)/Z
mem_ordered = np.argsort(-SM[:,0])

ind_head = 1
print('Contribution of the chosen head', 100*SM[mem_ordered[ind_head],0])

RHS_tar= synapses[mem_ordered[ind_head],N_voc:]
# RHS_tar= synapses[ind_head,N_voc:]
RHS_con= synapses[mem_ordered[ind_head],:N_voc]

beta_out_tar = 800.0
Z_out = np.sum(np.exp(beta_out_tar*RHS_tar))
RHS_tar_norm = np.exp(beta_out_tar*RHS_tar)/Z_out

N_show = 20
print('TARGET')
for ID in np.argsort(-RHS_tar_norm)[:N_show]:
    print(ID, VOC[ID], int(ID) not in exceptional_tokens, 100*RHS_tar_norm[ID])

beta_out_con = 10.0
Z_out = np.sum(np.exp(beta_out_con*RHS_con))
RHS_con_norm = np.exp(beta_out_con*RHS_con)/Z_out
    
N_show = 20
print('CONTEXT')
for ID in np.argsort(-RHS_con_norm)[:N_show]:
    print(ID, VOC[ID], int(ID) not in exceptional_tokens, 100*RHS_con_norm[ID])
    

In [None]:
biohasher.get_mem_concepts(1)

In [None]:
@interact(x=True, y=1.0)
def g(x, y):
    return (x, y)

In [None]:
H = 21*21 - 2
a = np.arange(H)

In [None]:
idx = 0
row_len = int(np.ceil(np.sqrt(H)))
out = []
arr = list(range(H))
while idx < H:
    out.append(arr[idx: idx + row_len])
    idx = idx + row_len
print(len(out))