# Interactive Recommendation System with Word Embeddings using Word2Vec, Plotly, and NetworkX

## Project Breakdown
- Task 1: Introduction
- Task 2: Exploratory Data Analysis and Preprocessing
- Task 3: Word2Vec with Gensim
- Task 4: Exploring Results
- Task 5: Building and Visualizing Interactive Network Graph (you are here)

## Task 5: Building and Visualizing Interactive Network Graph
Let's see if we can visualize our results better!

In [1]:
from sklearn.metrics.pairwise import cosine_similarity
import plotly.graph_objects as go
from plotly.offline import plot
import networkx as nx
import numpy as np

In [2]:
words = np.loadtxt("./Data/words.txt", dtype=str)
vectors = np.load("./Data/vectors.npy")

In [3]:
sim = cosine_similarity(vectors, vectors)

In [4]:
for i in range(len(vectors)):
    for j in range(len(vectors)):
        if i<=j:
            sim[i, j] = False

In [5]:
indices = np.argwhere(sim > 0.65)
indices

array([[ 18,  16],
       [ 30,   9],
       [ 31,   9],
       ...,
       [907, 646],
       [907, 847],
       [907, 886]])

In [6]:
G = nx.Graph()

for index in indices:
    G.add_edge(
        words[index[0]], words[index[1]],
        weight = sim[index[0], index[1]]
    )

In [7]:
weight_values = nx.get_edge_attributes(G, "weight")
weight_values

{('egg', 'eggs'): 0.7777155,
 ('orange', 'lemon'): 0.83867216,
 ('orange', 'lime'): 0.74004054,
 ('orange', 'limes'): 0.6737236,
 ('orange', 'oranges'): 0.6727178,
 ('orange', 'grapefruit'): 0.72318953,
 ('orange', 'tangerine'): 0.88223654,
 ('orange', 'clementine'): 0.7992686,
 ('lemon', 'lime'): 0.8338885,
 ('lemon', 'lemons'): 0.7375067,
 ('lemon', 'limes'): 0.72252166,
 ('lemon', 'tangerine'): 0.78065497,
 ('lemon', 'clementine'): 0.6861555,
 ('lime', 'limes'): 0.85942197,
 ('lime', 'tangerine'): 0.6746109,
 ('cilantro', 'parsley'): 0.65485466,
 ('cilantro', 'mint'): 0.6793539,
 ('parsley', 'chives'): 0.74516344,
 ('parsley', 'tarragon'): 0.7549317,
 ('parsley', 'herbs'): 0.67342025,
 ('parsley', 'marjoram'): 0.65538746,
 ('tomato', 'tomatoes'): 0.6868246,
 ('clove', 'cloves'): 0.9204472,
 ('stock', 'broth'): 0.94694597,
 ('stock', 'consomme'): 0.67639786,
 ('broth', 'consomme'): 0.6657147,
 ('leaf', 'leaves'): 0.8256098,
 ('pork', 'beef'): 0.6969555,
 ('pork', 'lamb'): 0.7786149,


In [8]:
positions = nx.spring_layout(G)
positions

{'egg': array([-0.05845469, -0.90990758]),
 'eggs': array([-0.07255968, -0.90629864]),
 'orange': array([0.4776772 , 0.56100416]),
 'lemon': array([0.47008324, 0.56519783]),
 'lime': array([0.48488587, 0.58241749]),
 'cilantro': array([0.66723812, 0.34402752]),
 'parsley': array([0.66461629, 0.37915415]),
 'tomato': array([-0.53419346,  0.75416052]),
 'tomatoes': array([-0.55010027,  0.77174777]),
 'clove': array([-0.14234589, -0.83984351]),
 'cloves': array([-0.14739348, -0.8752625 ]),
 'stock': array([-0.22193474, -0.9155255 ]),
 'broth': array([-0.23802704, -0.90573269]),
 'leaf': array([0.27920666, 0.96059728]),
 'leaves': array([0.27300471, 0.9383657 ]),
 'pork': array([-0.06528655,  0.35092738]),
 'beef': array([-0.05609591,  0.38933569]),
 'oregano': array([0.71903777, 0.40657184]),
 'thyme': array([0.69979239, 0.39091644]),
 'chile': array([-0.26265007, -0.01307354]),
 'chili': array([-0.29585317, -0.02725955]),
 'canola': array([ 0.03353336, -0.92099625]),
 'vegetable': array(

In [9]:
nx.set_node_attributes(G, name = "position", values = positions)

In [14]:
searches = ["potato", "gouda", "crab"]

In [15]:
edge_x = []
edge_y = []
weights = []
ave_x, ave_y = [], []
for edge in G.edges():
    x0, y0 = G.nodes[edge[0]]['position']
    x1, y1 = G.nodes[edge[1]]['position']
    edge_x.append(x0)
    edge_x.append(x1)
    edge_x.append(None)
    edge_y.append(y0)
    edge_y.append(y1)
    edge_y.append(None)
    ave_x.append(np.mean([x0, x1]))
    ave_y.append(np.mean([y0, y1]))
    weights.append(f'{edge[0]}, {edge[1]}: {weight_values[(edge[0], edge[1])]}')

edge_trace = go.Scatter(
    x=edge_x, y=edge_y,
    opacity=0.7,
    line=dict(width=2, color='White'),
    hoverinfo='text',
    mode='lines')

edge_trace.text = weights


node_x = []
node_y = []
sizes = []
for node in G.nodes():
    x, y = G.nodes[node]['position']
    node_x.append(x)
    node_y.append(y)
    if node in searches:
        sizes.append(50)
    else:
        sizes.append(15)

node_trace = go.Scatter(
    x=node_x, y=node_y,
    mode='markers',
    hoverinfo='text',
    marker=dict(
        showscale=True,
        line=dict(color='White'),
        # colorscale options
        #'Greys' | 'YlGnBu' | 'Greens' | 'YlOrRd' | 'Bluered' | 'RdBu' |
        #'Reds' | 'Blues' | 'Picnic' | 'Rainbow' | 'Portland' | 'Jet' |
        #'Hot' | 'Blackbody' | 'Earth' | 'Electric' | 'Viridis' |
        colorscale='Picnic',
        reversescale=False,
        color=[],
        opacity=0.9,
        size=sizes,
        colorbar=dict(
            thickness=15,
            title='Node Connections',
            xanchor='left',
            titleside='right'
        ),
        line_width=2
    )
)

invisible_similarity_trace = go.Scatter(
    x=ave_x, y=ave_y,
    mode='markers',
    hoverinfo='text',
    marker=dict(
        color=[],
        opacity=0,
    )
)

invisible_similarity_trace.text=weights

In [16]:
node_adjacencies = []
node_text = []
for node, adjacencies in enumerate(G.adjacency()):
    node_adjacencies.append(len(adjacencies[1]))
    node_text.append(adjacencies[0])

node_trace.marker.color = node_adjacencies
node_trace.text = node_text

In [17]:
fig = go.Figure(
    data=[edge_trace, node_trace, invisible_similarity_trace],
    layout=go.Layout(
        title='Network Graph of Word Embeddings',
        template='plotly_dark',
        titlefont_size=20,
        showlegend=False,
        hovermode='closest',
        margin=dict(b=20,l=5,r=5,t=40),
        annotations=[
            dict(
                text="Adapted from: <a href='https://plotly.com/ipython-notebooks/network-graphs/'> https://plotly.com/ipython-notebooks/network-graphs/</a>",
                showarrow=False,
                xref="paper", yref="paper",
                x=0.005, y=-0.002 ) 
        ],
        xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
        yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
    )
)

plot(fig)

'temp-plot.html'