In [1]:
%load_ext autoreload
%autoreload 2

In [8]:
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
import numpy as np
from mat6115.hidden import run_and_save_hidden, get_hidden
from mat6115.dataset import dataset_factory, TEXT, LABEL, SEED
from mat6115.model import RNN
from mat6115.train import custom_loss, acc
from mat6115.fixed_point import FixedPointFinder
from mat6115.analysis import load_model
from poutyne.framework import Model
import torch
from torch import nn
from pathlib import Path
import pickle
import json

torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True


In [3]:
train_iter, valid_iter, test_iter = dataset_factory("imdb", embedding="glove.6B.100d")

In [12]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
# device = torch.device('cpu')

SAVE_PATH = Path('gru_1layer')
vanilla_model = load_model(SAVE_PATH)
trained_model = load_model(SAVE_PATH, restore=True)

vanilla_model.to(device)
trained_model.to(device)

<poutyne.framework.model.Model at 0x7fe691c2adf0>

In [13]:
import spacy
nlp = spacy.load('en')

def predict_sentiment(model, sentence):
    tokenized = [tok.text for tok in nlp.tokenizer(sentence)]
    indexed = [TEXT.vocab.stoi[t] for t in tokenized]
    length = [len(indexed)]
    tensor = torch.LongTensor(indexed).to(device)
    tensor = tensor.unsqueeze(0)
    length_tensor = torch.LongTensor(length)
    prediction = model.predict((tensor, length_tensor), batch_size=1)
    return 1 * LABEL.vocab.stoi['neg'] - 1 / (1 + np.exp(-prediction[0]))

In [14]:
predict_sentiment(trained_model, "This film is terrible")

array([0.05614579], dtype=float32)

In [15]:
predict_sentiment(vanilla_model, "This film is terrible")

array([0.47799277], dtype=float32)

In [8]:
vanilla_hidden_states, vanilla_preds, vanilla_ground_truth = get_hidden(
    vanilla_model, test_iter, N=5000
)
trained_hidden_states, trained_preds, trained_ground_truth = get_hidden(
    trained_model, test_iter, N=5000
)

In [9]:
num_layers = vanilla_hidden_states.shape[0]

pca_vanilla = [PCA() for _ in range(num_layers)]
pca_trained = [PCA() for _ in range(num_layers)]

for i in range(num_layers):
    pca_vanilla[i].fit(vanilla_hidden_states[i])
    pca_trained[i].fit(trained_hidden_states[i])
    
with open(SAVE_PATH / 'pca_trained.pkl', 'wb') as pca_trained_file:
    pickle.dump(pca_trained, pca_trained_file)
    
with open(SAVE_PATH / 'pca_vanilla.pkl', 'wb') as pca_vanilla_file:
    pickle.dump(pca_vanilla, pca_vanilla_file)

In [None]:
fig, ax = plt.subplots(
    1, num_layers, 
    figsize=(6 * num_layers, 5)
)
if num_layers == 1:
    ax = [ax]

ax[0].set_ylabel('Explained Variance')
for i in range(num_layers):
    ax[i].scatter(
        np.arange(len(pca_vanilla[i].explained_variance_ratio_)),
        pca_vanilla[i].explained_variance_ratio_.cumsum(),
        s=2,
        c="#D9D9D9"
    )
    ax[i].scatter(
        np.arange(len(pca_trained[i].explained_variance_ratio_)),
        pca_trained[i].explained_variance_ratio_.cumsum(),
        s=2,
        c="#282828"
    )
    ax[i].set_xlabel('PCA Components')
    ax[i].legend(['Untrained', 'Trained'])
    ax[i].set_title(f'Layer {i+1}')

In [None]:
pca_vanilla_2 = [PCA(n_components=2) for _ in range(num_layers)]
pca_trained_2 = [PCA(n_components=2) for _ in range(num_layers)]


vanilla = np.empty((num_layers, vanilla_hidden_states.shape[1], 2))
trained = np.empty((num_layers, trained_hidden_states.shape[1], 2))
for i in range(vanilla_hidden_states.shape[0]):
    vanilla[i] = pca_vanilla_2[i].fit_transform(vanilla_hidden_states[i])
    trained[i] = pca_trained_2[i].fit_transform(trained_hidden_states[i])
    
with open(SAVE_PATH / 'pca_trained_2.pkl', 'wb') as pca_trained_file:
    pickle.dump(pca_trained_2, pca_trained_file)
    
with open(SAVE_PATH / 'pca_vanilla_2.pkl', 'wb') as pca_vanilla_file:
    pickle.dump(pca_vanilla_2, pca_vanilla_file)

In [None]:
fig, ax = plt.subplots(4, 2, figsize=(6, 6))
pos_idx = vanilla_ground_truth == LABEL.vocab.stoi['pos']
neg_idx = vanilla_ground_truth == LABEL.vocab.stoi['neg']
ax[0][0].scatter(vanilla_data[pos_idx, 0], vanilla_data[pos_idx, 1], c='g', alpha=0.009, s=2)
ax[0][0].set_title('Untrained ground truth - Pos')
ax[0][0].set_ylabel("PCA component #2")

ax[1][0].scatter(vanilla_data[neg_idx, 0], vanilla_data[neg_idx, 1], c='r', alpha=0.009, s=2)
ax[1][0].set_title('Untrained ground truth - Neg')
ax[1][0].set_ylabel("PCA component #2")

pos_idx = trained_ground_truth == LABEL.vocab.stoi['pos']
neg_idx = trained_ground_truth == LABEL.vocab.stoi['neg']
ax[0][1].scatter(trained_data[pos_idx, 0], trained_data[pos_idx, 1], c='g', alpha=0.009, s=2)
ax[0][1].set_title('Trained ground truth - Pos')

ax[1][1].scatter(trained_data[neg_idx, 0], trained_data[neg_idx, 1], c='r', alpha=0.009, s=2)
ax[1][1].set_title('Trained ground truth - Neg')

pos_idx = vanilla_preds == LABEL.vocab.stoi['pos']
neg_idx = vanilla_preds == LABEL.vocab.stoi['neg']
ax[2][0].scatter(vanilla_data[pos_idx, 0], vanilla_data[pos_idx, 1], c='g', alpha=0.009, s=2)
ax[2][0].set_title('Untrained Preds - Pos')
ax[2][0].set_ylabel("PCA component #2")

ax[3][0].scatter(vanilla_data[neg_idx, 0], vanilla_data[neg_idx, 1], c='r', alpha=0.009, s=2)
ax[3][0].set_title('Untrained Preds - Neg')
ax[3][0].set_xlabel("PCA component #1")
ax[3][0].set_ylabel("PCA component #2")

pos_idx = trained_preds == LABEL.vocab.stoi['pos']
neg_idx = trained_preds == LABEL.vocab.stoi['neg']
ax[2][1].scatter(trained_data[pos_idx, 0], trained_data[pos_idx, 1], c='g', alpha=0.009, s=2)
ax[2][1].set_title('Trained Preds - Pos')

ax[3][1].scatter(trained_data[neg_idx, 0], trained_data[neg_idx, 1], c='r', alpha=0.009, s=2)
ax[3][1].set_title('Trained Preds - Neg')
ax[3][1].set_xlabel("PCA component #1")
plt.tight_layout()

In [None]:

pca_trained_3 = PCA(n_components=3)

trained_data_3 = pca_trained_3.fit_transform(trained_hidden_states)

In [None]:
from itertools import product

fig, ax = plt.subplots(3, 3, figsize=(6, 6))
random_idx = np.arange(len(trained_data_3))
np.random.shuffle(random_idx)
random_idx = random_idx[:1000]

pos_idx = vanilla_ground_truth[random_idx] == LABEL.vocab.stoi['pos']
neg_idx = vanilla_ground_truth[random_idx] == LABEL.vocab.stoi['neg']
print(ax.shape)

for i, j in product([0,1,2], [0,1,2]):
    ax[i][j].scatter(
        trained_data_3[random_idx][pos_idx, i], 
        trained_data_3[random_idx][pos_idx, j], 
        c='g', alpha=0.2, s=2
    )
    ax[i][j].scatter(
        trained_data_3[random_idx][neg_idx, i], 
        trained_data_3[random_idx][neg_idx, j], 
        c='r', alpha=0.2, s=2
    )
    if j == 0:
        ax[i][j].set_ylabel(f"Component #{i+1}")
    if i == 2:
        ax[i][j].set_xlabel(f"Component #{j+1}")



plt.tight_layout()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d
%matplotlib notebook

In [None]:

pca_trained_3 = PCA(n_components=3)

trained_data_3 = pca_trained_3.fit_transform(trained_hidden_states)

In [None]:
# Plot trajectory
fig = plt.figure()
ax = plt.axes(projection='3d')
random_idx = np.arange(len(trained_data_3))
np.random.shuffle(random_idx)
random_idx = random_idx[:3000]

pos_idx = trained_ground_truth[random_idx] == LABEL.vocab.stoi['pos']
neg_idx = trained_ground_truth[random_idx] == LABEL.vocab.stoi['neg']

ax.scatter3D(
    trained_data_3[random_idx][pos_idx,0],
    trained_data_3[random_idx][pos_idx,1],
    trained_data_3[random_idx][pos_idx,2],
    c='g', s=2, alpha=0.3
)

ax.scatter3D(
    trained_data_3[random_idx][neg_idx,0],
    trained_data_3[random_idx][neg_idx,1],
    trained_data_3[random_idx][neg_idx,2],
    c='r', s=2, alpha=0.3
)

ax.set_xlabel('Component #1')
ax.set_ylabel('Component #2')
ax.set_zlabel('Component #3')

In [None]:
trained_data_3[random_idx][pos_idx,2].shape

-------------------

# Fixed point

In [10]:
fixed_point_finder = FixedPointFinder(
    rnn_cell=trained_model.network.rnn[0], 
    lr=0.01, 
    n_iter=200, 
    device=device
)

In [11]:
trained_hidden_states = torch.tensor(trained_hidden_states).to(device)
constant_input = torch.zeros((trained_hidden_states.shape[1], 1, kwargs['embedding_dim'])).to(device)

In [12]:
print(trained_hidden_states.shape)
print(constant_input.shape)

torch.Size([1, 498968, 256])
torch.Size([498968, 1, 100])


In [None]:
point, is_fixed_point = fixed_point_finder.run(
    trained_hidden_states[:, :1000], 
    constant_input[:1000], 
    batch_size=256
)

Epoch 1/200 0.03s/step Step 4: loss: 4388.904785

In [None]:
indexed = [TEXT.vocab.stoi[TEXT.pad_token]]
length = [1]
tensor = torch.LongTensor(indexed).to(device)
tensor = tensor.unsqueeze(0)
length_tensor = torch.LongTensor(length)
    
for layer in trained_hidden_states:
    for state in layer:
        state = torch.LongTensor(state).unsqueeze(0).unsqueeze(0)
        _, _, _, hidden_state = trained_model.network(tensor, length_tensor, state)
        break

In [20]:
state.shape

torch.Size([1, 256])