In [3]:
from gensim.models import Word2Vec
import gensim.downloader as api
import pprint
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sklearn.decomposition import PCA
import numpy as np

def pca_plot_3d(models, words, seed=23):
    """Creates 3D PCA visualizations for multiple word models using Plotly"""
    
    # Create subplot figure
    fig = make_subplots(
        rows=1, cols=len(models),
        subplot_titles=[f'Model {i+1}' for i in range(len(models))],
        specs=[[{'type': 'scene'}] * len(models)]
    )
    
    for model_ix, model in enumerate(models, 1):
        labels = []
        tokens = []
        
        # Collect word vectors and labels
        for word in words:
            tokens.append(model[word])
            labels.append(word)
            
        # Apply PCA
        pca = PCA(n_components=3, random_state=seed)
        new_values = pca.fit_transform(np.array(tokens))
        
        # Extract 3D coordinates
        x = new_values[:, 0]
        y = new_values[:, 1]
        z = new_values[:, 2]
        
        # Create 3D scatter plot
        scatter = go.Scatter3d(
            x=x, y=y, z=z,
            mode='markers+text',
            text=labels,
            textposition='top center',
            marker=dict(
                size=8,
                color=np.random.randint(0, len(words), size=len(words)),
                colorscale='Viridis',
                opacity=0.8
            ),
            hovertemplate=
            '<b>Word:</b> %{text}<br>' +
            '<b>X:</b> %{x:.2f}<br>' +
            '<b>Y:</b> %{y:.2f}<br>' +
            '<b>Z:</b> %{z:.2f}<br>'
        )
        
        # Add trace to subplot
        fig.add_trace(scatter, row=1, col=model_ix)
        
        # Update scene aspects
        fig.update_scenes(
            aspectmode='cube',
            xaxis_title='PCA Component 1',
            yaxis_title='PCA Component 2',
            zaxis_title='PCA Component 3'
        )
    
    # Update layout
    fig.update_layout(
        height=800,
        width=600 * len(models),
        title_text="3D PCA Visualization of Word Embeddings",
        showlegend=False
    )
    
    # Show plot
    fig.show()

# Load models
v2w_model = api.load('word2vec-google-news-300')
glove_model = api.load('glove-twitter-25')

In [None]:
# Print similar words comparison
print("Words most similar to 'computer' with word2vec and glove respectively:")
pprint.pprint(v2w_model.most_similar("computer")[:3])
pprint.pprint(glove_model.most_similar("computer")[:3])

# Get common words and visualize
print("\n3D projection of some common words of both models:")
sample_common_words = list(
    set(v2w_model.index_to_key[100:10000]) & 
    set(glove_model.index_to_key[100:10000])
)[:100]

# Create visualization
pca_plot_3d([v2w_model, glove_model], sample_common_words)