In [1]:
import gensim
from gensim.models import Word2Vec
import nltk

# Ensure you have downloaded the necessary NLTK data
# nltk.download('punkt')

# Sample sentences
sentences = [
    ['apple', 'fruit', 'healthy'],
    ['apple', 'company', 'technology'],
    ['banana', 'fruit', 'yellow'],
    ['computer', 'technology', 'device'],
    ['technology', 'innovation', 'progress']
]

# Train the model
model = Word2Vec(sentences, vector_size=100, window=5, min_count=1, workers=4)

# Get the vector for 'apple'
apple_vector = model.wv['apple']
print("Vector for 'apple':", apple_vector[:5])  # Print first 5 dimensions

# Find similar words
similar_words = model.wv.most_similar('technology', topn=3)
print("Words similar to 'technology':", similar_words)

Vector for 'apple': [ 9.4563962e-05  3.0773198e-03 -6.8126451e-03 -1.3754654e-03
  7.6685809e-03]
Words similar to 'technology': [('company', 0.21617142856121063), ('yellow', 0.09291722625494003), ('healthy', 0.06285078078508377)]


In [2]:
import numpy as np
import plotly.graph_objs as go
from sklearn.decomposition import PCA

# Select words and get their vectors
vocab = list(model.wv.key_to_index)
word_vectors = np.array([model.wv[word] for word in vocab])

# Reduce dimensions with PCA to 3D
pca = PCA(n_components=3)
Y_pca_3d = pca.fit_transform(word_vectors)

# Prepare data for Plotly
trace = go.Scatter3d(
    x=Y_pca_3d[:, 0],
    y=Y_pca_3d[:, 1],
    z=Y_pca_3d[:, 2],
    mode='markers+text',
    text=vocab,
    textposition='top center',
    marker=dict(
        size=5,
        color=Y_pca_3d[:, 2],
        colorscale='Viridis',
        opacity=0.8,
    )
)

layout = go.Layout(
    title='Interactive 3D PCA visualization',
    scene=dict(
        xaxis_title='Component 1',
        yaxis_title='Component 2',
        zaxis_title='Component 3',
    ),
    height=1000,
)

fig = go.Figure(data=[trace], layout=layout)

# Display the plot
fig.show()