# Knowledge Based Recommendation of Ingredients
## Notebook 4: Build and Visualize Interactive Knowledge Graph
### Project Breakdown
    1  Exploratory Data Analysis and Preprocessing
    2: Build Word Embeddings using Word2Vec, FastText
    3: Recommend Recipes based on ingredients
    4: Build and Visualize Interactive Knowledge Graph



In [1]:
from sklearn.metrics.pairwise import cosine_similarity
import plotly.graph_objects as go
from plotly.offline import plot
import networkx as nx
import numpy as np

In [2]:
# load items, and their vector embeddings
items = np.loadtxt('data/items.txt', dtype=str)
vectors = np.load('data/w2v_vectors.npy')

In [4]:
# calculate cosine similarity amongst vectors
sims = cosine_similarity(vectors, vectors)

for i in range(len(vectors)):
    for j in range(len(vectors)):
        if i <= j :
            sims[i, j] = False

In [7]:
# we can set a thresold of similarity, and 
# consider all similarities that are above the threshold to be connected in graph

idxs = np.argwhere(sims > 0.55)

In [8]:
# Build a graph with edge between two items if they're similar
G = nx.Graph()

for index in idxs:
    G.add_edge(
        items[index[0]], items[index[1]],
        weight=sims[index[0], index[1]]
    )

In [15]:
weight_values = nx.get_edge_attributes(G, 'weight')
weight_values[('almond', 'pistachio')]

0.58811694

In [18]:
# spring_layout in short, keeps everthing that are related are close together
# and everything that are disimilar are pulled away

positions = nx.spring_layout(G)
nx.set_node_attributes(G, name='position', values=positions)


In [19]:
searches = []

In [20]:
edge_x = []
edge_y = []
weights = []
ave_x, ave_y = [], []
for edge in G.edges():
    x0, y0 = G.nodes[edge[0]]['position']
    x1, y1 = G.nodes[edge[1]]['position']
    edge_x.append(x0)
    edge_x.append(x1)
    edge_x.append(None)
    edge_y.append(y0)
    edge_y.append(y1)
    edge_y.append(None)
    ave_x.append(np.mean([x0, x1]))
    ave_y.append(np.mean([y0, y1]))
    weights.append(f'{edge[0]}, {edge[1]}: {weight_values[(edge[0], edge[1])]}')

edge_trace = go.Scatter(
    x=edge_x, y=edge_y,
    opacity=0.7,
    line=dict(width=2, color='White'),
    hoverinfo='text',
    mode='lines')

edge_trace.text = weights


node_x = []
node_y = []
sizes = []
for node in G.nodes():
    x, y = G.nodes[node]['position']
    node_x.append(x)
    node_y.append(y)
    if node in searches:
        sizes.append(50)
    else:
        sizes.append(15)

node_trace = go.Scatter(
    x=node_x, y=node_y,
    mode='markers',
    hoverinfo='text',
    marker=dict(
        showscale=True,
        line=dict(color='White'),
        # colorscale options
        #'Greys' | 'YlGnBu' | 'Greens' | 'YlOrRd' | 'Bluered' | 'RdBu' |
        #'Reds' | 'Blues' | 'Picnic' | 'Rainbow' | 'Portland' | 'Jet' |
        #'Hot' | 'Blackbody' | 'Earth' | 'Electric' | 'Viridis' |
        colorscale='Picnic',
        reversescale=False,
        color=[],
        opacity=0.9,
        size=sizes,
        colorbar=dict(
            thickness=15,
            title='Node Connections',
            xanchor='left',
            titleside='right'
        ),
        line_width=2
    )
)

invisible_similarity_trace = go.Scatter(
    x=ave_x, y=ave_y,
    mode='markers',
    hoverinfo='text',
    marker=dict(
        color=[],
        opacity=0,
    )
)

invisible_similarity_trace.text=weights

In [21]:
node_adjacencies = []
node_text = []
for node, adjacencies in enumerate(G.adjacency()):
    node_adjacencies.append(len(adjacencies[1]))
    node_text.append(adjacencies[0])

node_trace.marker.color = node_adjacencies
node_trace.text = node_text

In [25]:
fig = go.Figure(
    data=[edge_trace, node_trace, invisible_similarity_trace],
    layout=go.Layout(
        title='Knowledge Graph of Recipe Ingredients',
        template='plotly_white',
        titlefont_size=20,
        showlegend=False,
        hovermode='closest',
        margin=dict(b=20,l=5,r=5,t=40),
        annotations=[
            dict(
                text="Created By: <a href='https://github.com/gborn'> Glad Nayak</a>",
                showarrow=False,
                xref="paper", yref="paper",
                x=0.005, y=-0.002 ) 
        ],
        xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
        yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
    )
)

plot(fig)

'temp-plot.html'

## Deployment

In [47]:
%%writefile src/build_graph.py
from sklearn.metrics.pairwise import cosine_similarity
import plotly.graph_objects as go
import networkx as nx
import numpy as np

def build_similarity(vectors):
    """
    builds similarity matrix of vectors using cosine similarity
    @input: vector embeddings of items
    @returns similarity matrix

    """
    sims = cosine_similarity(vectors, vectors)
    for i in range(len(vectors)):
        for j in range(len(vectors)):
            if i <= j :
                sims[i, j] = False

    return sims


def build_graph(sims, items, searches=[]):
    """
    builds knowledge graph of ingredients
    @input sims: similarity matrix
    """

    idxs = np.argwhere(sims > 0.55)

    # Build a graph with edge between two items if they're similar
    G = nx.Graph()

    for index in idxs:
        G.add_edge(
            items[index[0]], items[index[1]],
            weight=sims[index[0], index[1]]
        )

    # spring_layout in short, keeps everthing that are related are close together
    # and everything that are disimilar are pulled away
    positions = nx.spring_layout(G)
    nx.set_node_attributes(G, name='position', values=positions)
    weight_values = nx.get_edge_attributes(G, 'weight')
    
    edge_x = []
    edge_y = []
    weights = []
    ave_x, ave_y = [], []
    for edge in G.edges():
        x0, y0 = G.nodes[edge[0]]['position']
        x1, y1 = G.nodes[edge[1]]['position']
        edge_x.append(x0)
        edge_x.append(x1)
        edge_x.append(None)
        edge_y.append(y0)
        edge_y.append(y1)
        edge_y.append(None)
        ave_x.append(np.mean([x0, x1]))
        ave_y.append(np.mean([y0, y1]))
        weights.append(f'{edge[0]}, {edge[1]}: {weight_values[(edge[0], edge[1])]}')

    edge_trace = go.Scatter(
        x=edge_x, y=edge_y,
        opacity=0.7,
        line=dict(width=2, color='White'),
        hoverinfo='text',
        mode='lines')

    edge_trace.text = weights


    node_x = []
    node_y = []
    sizes = []
    for node in G.nodes():
        x, y = G.nodes[node]['position']
        node_x.append(x)
        node_y.append(y)
        if node in searches:
            sizes.append(50)
        else:
            sizes.append(15)

    node_trace = go.Scatter(
        x=node_x, y=node_y,
        mode='markers',
        hoverinfo='text',
        marker=dict(
            showscale=True,
            line=dict(color='White'),
            # colorscale options
            #'Greys' | 'YlGnBu' | 'Greens' | 'YlOrRd' | 'Bluered' | 'RdBu' |
            #'Reds' | 'Blues' | 'Picnic' | 'Rainbow' | 'Portland' | 'Jet' |
            #'Hot' | 'Blackbody' | 'Earth' | 'Electric' | 'Viridis' |
            colorscale='Picnic',
            reversescale=False,
            color=[],
            opacity=0.9,
            size=sizes,
            colorbar=dict(
                thickness=15,
                title='Node Connections',
                xanchor='left',
                titleside='right'
            ),
            line_width=2
        )
    )

    invisible_similarity_trace = go.Scatter(
        x=ave_x, y=ave_y,
        mode='markers',
        hoverinfo='text',
        marker=dict(
            color=[],
            opacity=0,
        )
    )

    invisible_similarity_trace.text=weights

    node_adjacencies = []
    node_text = []
    for node, adjacencies in enumerate(G.adjacency()):
        node_adjacencies.append(len(adjacencies[1]))
        node_text.append(adjacencies[0])

    node_trace.marker.color = node_adjacencies
    node_trace.text = node_text

    fig = go.Figure(
    data=[edge_trace, node_trace, invisible_similarity_trace],
    layout=go.Layout(
        title='Knowledge Graph of Recipe Ingredients',
        template='plotly_white',
        titlefont_size=20,
        showlegend=False,
        hovermode='closest',
        margin=dict(b=20,l=5,r=5,t=40),
        annotations=[
            dict(
                text="Created By: <a href='https://github.com/gborn'> Glad Nayak</a>",
                showarrow=False,
                xref="paper", yref="paper",
                x=0.005, y=-0.002 ) 
        ],
        xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
        yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
    ))

    return fig



Overwriting src/build_graph.py


In [61]:
%%writefile app.py
from src.build_graph import build_similarity, build_graph
import streamlit as st
import numpy as np
import os


PAGE_CONFIG = {"page_title":"App by Glad Nayak","page_icon":":smiley:","layout":"wide"}
st.set_page_config(**PAGE_CONFIG)

def main():
    """
    Render UI on web app, build and display knowledge graph
    """

    st.title('Knowledge Graph of Food Ingredients')
    # load items, and their vector embeddings
    items = np.loadtxt('data/items.txt', dtype=str)
    vectors = np.load('data/vectors.npy')

    # select ingredients
    selected_items = st.sidebar.multiselect('Select Ingredients', options=list(items))
    if selected_items:
        sims = build_similarity(vectors)
        fig = build_graph(sims, items, selected_items)   
        fig.update_layout(title_text="title", margin={"r": 0, "t": 0, "l": 0, "b": 0}, height=800)
        st.plotly_chart(fig, use_container_width=True, )

    else:
        sims = build_similarity(vectors)
        fig = build_graph(sims, items,  '')
        fig.update_layout(title_text="title", margin={"r": 0, "t": 0, "l": 0, "b": 0}, height=800)
        st.plotly_chart(fig, use_container_width=True, )

    
    # select vector weights
    selected_weights = st.sidebar.radio('Choose weights', options=['Word2Vec', 'FastText'])
    if selected_weights:
        vectors = np.load('data/ft_vectors.npy')

    
    st.sidebar.markdown("## How it works? :tomato:")
    st.sidebar.write(
        "Search ingredients and select weights to see similar ingredients, or explore existing clusters of food ingredients."
    )

if __name__ == '__main__':
	main()

Overwriting app.py


## Local Deployment for Testing

In [41]:
!streamlit run app.py --server.enableCORS=false &>/dev/null&

!lt --Bypass-Tunnel-Reminder --subdomain 'bornapp' --port 8501 &>/dev/null&

In [35]:
# kill app and clean up memory
st_id = !pgrep streamlit
!kill {st_id[0]}

lt_id = !pgrep lt
!kill {lt_id[0]}

/bin/bash: line 0: kill: {st_id[0]}: arguments must be process or job IDs


In [37]:
!pip install streamlit > /dev/null
!npm install -g localtunnel > /dev/null

[K[?25h

In [66]:
#  make requirements.txt file
#!pip install pipreqs
!pipreqs --force .

INFO: Successfully saved requirements file in ./requirements.txt


In [62]:
%%writefile setup.py

mkdir -p ~/.streamlit/

echo "\
[general]\n\
email = \"gladnayak25@gmail.com\"\n\
" > ~/.streamlit/credentials.toml

echo "\
[server]\n\
headless = true\n\
enableCORS=false\n\
port = $PORT\n\
" > ~/.streamlit/config.toml

Writing setup.py


In [63]:
%%writefile Procfile

web: sh setup.sh && streamlit run app.py

Writing Procfile
