# Setup and Settings

Necessary imports and settings:

In [7]:
import torch, click, os
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
import pandas as pd
import plotly.io as pio
import plotly.express as px
pio.renderers.default = "vscode"

We now select a word to analyze. Note that to visualize data, you must have already created the processed data file using 
```sh
python main.py --word [your_word]
```

Processed data will be loaded from `cache/[your_word].pt`. 

In [8]:
word = 'right'

# Visualization

Running the following code block will create an interactive 3D PCA plot of the data extracted for the chosen word.

In [9]:

# Load the cached data for this word and ouput the sample size
with open(os.path.join(os.getcwd(), f'cache/{word}.pt'), 'rb') as FILE:
    tensor, metadata = torch.load(FILE)

num_samples = len(metadata)
print(f'Number of samples: {num_samples}')

# Preform PCA on the embedded vectors
pca = PCA(n_components = 3)
transformed_data = pca.fit_transform(tensor.to('cpu').numpy())

# Format the dataframe for display purposes
df = pd.concat([metadata.reset_index(), pd.DataFrame(transformed_data, columns=['x','y','z'])], axis = 1)
df['size'] = [0.1] * num_samples

# Display data
fig = px.scatter_3d(
    df, 
    x = 'x', 
    y = 'y', 
    z = 'z', 
    hover_name = 'context', 
    hover_data = ['title', 'author', 'authoryearofbirth'], 
    size = 'size', 
    opacity = 0.1, 
    width = 800, 
    height = 800
    )

fig.show()

df

Number of samples: 9580


Unnamed: 0,index,id,title,author,authoryearofbirth,authoryearofdeath,language,downloads,subjects,type,context,x,y,z,size
0,1,/home/andrew/polysemia/SPGC-tokens-2018-07-18/...,The Declaration of Independence of the United ...,"Jefferson, Thomas",1743.0,1826.0,['en'],604.0,"{'United States -- History -- Revolution, 1775...",Text,...votes but because it is RIGHT if a free soc...,-5.245555,-1.138576,3.466522,0.1
1,1,/home/andrew/polysemia/SPGC-tokens-2018-07-18/...,The Declaration of Independence of the United ...,"Jefferson, Thomas",1743.0,1826.0,['en'],604.0,"{'United States -- History -- Revolution, 1775...",Text,...cost of modern weapons both RIGHTly alarmed...,-0.314641,-1.327787,-3.529686,0.1
2,1,/home/andrew/polysemia/SPGC-tokens-2018-07-18/...,The Declaration of Independence of the United ...,"Jefferson, Thomas",1743.0,1826.0,['en'],604.0,"{'United States -- History -- Revolution, 1775...",Text,...and inventors the exclusive RIGHT to their ...,-4.032016,12.524795,-0.682459,0.1
3,3,/home/andrew/polysemia/SPGC-tokens-2018-07-18/...,John F. Kennedy's Inaugural Address,"Kennedy, John F. (John Fitzgerald)",1917.0,1963.0,['en'],28.0,{'Presidents -- United States -- Inaugural add...,Text,...votes but because it is RIGHT if a free soc...,-5.245572,-1.138573,3.466505,0.1
4,3,/home/andrew/polysemia/SPGC-tokens-2018-07-18/...,John F. Kennedy's Inaugural Address,"Kennedy, John F. (John Fitzgerald)",1917.0,1963.0,['en'],28.0,{'Presidents -- United States -- Inaugural add...,Text,...cost of modern weapons both RIGHTly alarmed...,-0.314642,-1.327799,-3.529733,0.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9575,262,/home/andrew/polysemia/SPGC-tokens-2018-07-18/...,The Collected Poems of Rupert Brooke,"Brooke, Rupert",1887.0,1915.0,['en'],83.0,"{'English poetry', 'Poets, English -- 20th cen...",Text,...me to left and to RIGHT hunched figures and...,6.261597,-0.130847,-1.063463,0.1
9576,262,/home/andrew/polysemia/SPGC-tokens-2018-07-18/...,The Collected Poems of Rupert Brooke,"Brooke, Rupert",1887.0,1915.0,['en'],83.0,"{'English poetry', 'Poets, English -- 20th cen...",Text,...so light we were so RIGHT we were so fair f...,-4.703040,-3.315287,0.347010,0.1
9577,262,/home/andrew/polysemia/SPGC-tokens-2018-07-18/...,The Collected Poems of Rupert Brooke,"Brooke, Rupert",1887.0,1915.0,['en'],83.0,"{'English poetry', 'Poets, English -- 20th cen...",Text,...the sonnets which are RIGHTly given pride o...,-1.741810,-2.186450,0.913677,0.1
9578,262,/home/andrew/polysemia/SPGC-tokens-2018-07-18/...,The Collected Poems of Rupert Brooke,"Brooke, Rupert",1887.0,1915.0,['en'],83.0,"{'English poetry', 'Poets, English -- 20th cen...",Text,...with absolute conviction of the RIGHTness o...,-1.415005,1.465185,5.294535,0.1
