In [1]:
import h5py
import numpy as np
import plotly.graph_objects as go
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt

# Load embeddings and titles
with h5py.File('data/encoded_vectors.h5', 'r') as hf:
    embeddings = hf['vectors'][:]
    titles = hf['bill_info'][:]
    titles = [title.decode('utf-8') for title in titles]


In [2]:
# Apply t-SNE to reduce dimensions to 2D for visualization
tsne = TSNE(n_components=2, random_state=42)
embeddings_reduced = tsne.fit_transform(embeddings)

In [3]:
import plotly.graph_objects as go

fig = go.Figure()

for i, title in enumerate(titles):
    if i >= len(embeddings_reduced):  # Prevent out of bounds error
        break
    fig.add_trace(go.Scatter(x=[embeddings_reduced[i, 0]], y=[embeddings_reduced[i, 1]],
                             mode='markers', text=[title],
                             textposition="top center", marker=dict(size=5)))

fig.update_layout(title='2D Visualization of Embeddings', xaxis_title='Component 1', yaxis_title='Component 2',
                  height=800, width=800)
fig.show()
