# Visualizing Vectors with TensorBoard


# Recall
* You need to do `pip install tensorboard`
* Documentation on using TensorBoard in PyTorch https://pytorch.org/docs/stable/tensorboard.html

In [2]:
# import a bunch of stuff
from typing import List, Tuple
import scipy
import numpy as np
import os.path
from pathlib import Path
import pickle

import torch  # PyTorch

from transformers import BertModel, BertTokenizer, BertConfig

# Import the model

In [3]:
# https://huggingface.co/transformers/pretrained_models.html
model_name = 'bert-base-uncased'

# Need to use the same tokenizer that was used to train the model so that it breaks 
# up words into tokens the same way.
tokenizer = BertTokenizer.from_pretrained(model_name)

# This model is huge!!!!!!!!
model = BertModel.from_pretrained(model_name)

# Parameters used by the pre-trained model
config = BertConfig.from_pretrained(model_name)

# Recall our nice function to make the vectors.

In [4]:
def make_vector(text: str) -> np.ndarray:
    tokens = get_tokens(text, tokenizer, config)
    token_ids: List[int] = tokenizer.convert_tokens_to_ids(tokens)
    token_ids_tensor = torch.tensor(token_ids)
    token_ids_tensor = torch.unsqueeze(token_ids_tensor, 0)
    last_hidden_state, pooler_output = model(token_ids_tensor)
    vector = pooler_output
    #vector = last_hidden_state.mean(dim=1)  # Can do this too
    np_vector = vector.detach().numpy()
    np_vector = np_vector.squeeze()
    return np_vector

# Load Tweets
Can be found at with the code at https://github.com/jmugan/modern_practical_nlp

In [5]:
all_tweets: List[str] = []
with open('jmugan_tweets.txt', 'r') as f:
    for tweet in f:
        tweet = tweet.strip()  # remove newline
        all_tweets.append(tweet)

# Pull up the vectors from episode 1
* You should already have them saved from episode 1, but this will generate them if not.

In [6]:
home = str(Path.home())
data_dir = os.path.join(home, 'temp')
print("Data directory: ", data_dir)

transformer_vec_pickle_file = os.path.join(data_dir, 'transformer_vecs.pkl')
average_vec_pickle_file = os.path.join(data_dir, 'average_vecs.pkl')

if os.path.isfile(transformer_vec_pickle_file):
    print("Loading saved transformer vecs.")
    with open(transformer_vec_pickle_file, 'rb') as f:
        all_transformer_vecs = pickle.load(f)
else:
    print("Generating transformer vecs.")
    all_transformer_vecs: List[np.ndarray] = []
    for tweet in all_tweets:
        vector = make_vector(tweet)
        all_transformer_vecs.append(vector)
    with open(transformer_vec_pickle_file, 'wb') as f:
        pickle.dump(all_transformer_vecs, f)

if os.path.isfile(average_vec_pickle_file):
    print("Loading saved average vecs.")
    with open(average_vec_pickle_file, 'rb') as f:
        all_average_vecs = pickle.load(f)
else:
    print("Generating average vecs.")
    all_average_vecs: List[np.ndarray] = []
    for tweet in all_tweets:
        vector = get_average_vector_for_text(tweet)
        all_average_vecs.append(vector)
    with open(average_vec_pickle_file, 'wb') as f:
        pickle.dump(all_average_vecs, f)

Data directory:  /Users/jmugan/temp
Loading saved transformer vecs.
Loading saved average vecs.


# Let's also pull up the vectors in the model from episode 2 where we trained the model to classify as about movies or not

In [7]:
custom_vec_pickle_file = os.path.join(data_dir, 'custom_vecs.pkl')
with open(custom_vec_pickle_file, 'rb') as f:
        custom_vecs = pickle.load(f)

# Format the data to show

In [11]:
all_tweets

["Cats don't like to wrestle.",
 'It would be nice if we could abandon all of this inane talk about celebrities and go back to gossiping about people that we actually know.',
 "Amazing how worthless I would be if you sent me back in time 10,000 years. I could describe amazing technology but couldn't build any of it.",
 "I can't believe we still have pennies. They aren't even worth picking up off the ground.",
 "Sometimes you wake and wish the adventure dream you were having didn't have to end. I want video games with that much excitement and realism",
 "Why isn't there an app that lets me share photos with all my social media sites? Facebook, G+, Twitter. Oh yeah, #NoProfitInFreedom",
 'This weekend, I was able to convince my youngest son, age 7, to branch beyond chicken nuggets to Orange Chicken. #CulinaryVictory',
 "I've found that being a fair-weather sports fan is a real time saver.",
 'My son, age 7, was playing "restaurant," and the first thing he did was set up security cameras.

In [23]:
from torch.utils.tensorboard import SummaryWriter
summary = SummaryWriter(data_dir)

is_annoying_error = True
if is_annoying_error: # if you get this error, do this, I think it only
    # happens if you already have tensorflow installed
    # "AttributeError: module 'tensorflow_core._api.v2.io.gfile' 
    # has no attribute 'get_filesystem'"
    # https://github.com/pytorch/pytorch/issues/30966
    import tensorflow as tf
    import tensorboard as tb
    tf.io.gfile = tb.compat.tensorflow_stub.io.gfile

all_transformer_np = np.array(all_transformer_vecs)
all_average_np = np.array(all_average_vecs)

print(len(all_transformer_vecs))
print(all_transformer_np.shape)
print(len(all_tweets))

summary.add_embedding(all_transformer_np, 
                       metadata=all_tweets, 
                       tag='transformer_vecs')

summary.add_embedding(all_average_np, 
                       metadata=all_tweets, 
                       tag='average_vecs')

summary.add_embedding(custom_vecs['vecs'], 
                       metadata=custom_vecs['tweets'], 
                       tag='custom_vecs')



1219
(1219, 768)
1219


# Look at the vectors in TensorBoard

`tensorboard --logdir=./`

then go to http://localhost:6006/