In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_excel("curated food data.xlsx")
df['date'] = pd.to_datetime(df['date']) # set datetime

In [3]:
# combine comments url with curated food data

df_cook = pd.read_csv("covidcookery.csv")
df_cook_2 = df_cook[["image url", "comments url"]]
df_cook_2 = df_cook_2.drop_duplicates()
df = pd.merge(df, df_cook_2, left_on="image url", right_on="image url")

In [4]:
# flattern columns that contain keywords and append together

all_keyword = pd.DataFrame(columns=["date","title","image url", "comments url","up votes","keyword"])
for columns in df.loc[:,"A":"N"]: # loop through keywords column and append all keywords in one column
    n = "".join(columns)
    keyword = df[["date","title","image url","comments url","up votes",n]]
    keyword = keyword[keyword[n].notna()]
    keyword = keyword.rename(columns={n:"keyword"})
    all_keyword = all_keyword.append(keyword)

In [5]:
# create network nodes, set title image and keyword as separated nodes

nodes = all_keyword["title"].append(all_keyword["keyword"])
df_nodes = pd.DataFrame(nodes, columns=['name'])
df_nodes = df_nodes.assign(id=(df_nodes["name"]).astype('category').cat.codes) # assign unique id for each nodes

# based on nodes, set network edges

id_dict = dict(zip(df_nodes["name"],df_nodes["id"])) # map id to title and keyword
df_edges = all_keyword[["date","title","keyword","image url","comments url","up votes"]]
df_edges["id_title"] = df_edges["title"].map(id_dict)
df_edges["id_keyword"] = df_edges["keyword"].map(id_dict)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if sys.path[0] == '':


In [7]:
# filter data by date range

mask = (df['date'] >= "2020-11-1") & (df['date'] <= "2020-11-15")
df_nodes = df_nodes.loc[mask]
df_edges = df_edges.loc[mask]

In [8]:
# add nodes with unique id with date

df_nodes["rand"] = "NOV1101"
df_nodes['id'] = df_nodes["rand"] + df_nodes['id'].astype(str)
df_edges["rand"] = "NOV1101"
df_edges["id_title"] = df_edges["rand"].astype(str) + df_edges["id_title"].astype(str)
df_edges["id_keyword"] = df_edges["rand"].astype(str) + df_edges["id_keyword"].astype(str)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.


In [11]:
# build network graph

node = df_nodes["id"]
edge = [tuple(x) for x in df_edges[["id_title","id_keyword"]].to_numpy()]

import networkx as nx
G = nx.Graph()
G.add_nodes_from(node)
G.add_edges_from(edge)

In [12]:
# set node and edge attributes

img_dict = dict(zip(df_edges["id_title"],df_edges["image url"]))
comment_dict = dict(zip(df_edges["id_title"],df_edges["comments url"]))
title_dict = dict(zip(df_edges["id_title"],df_edges["title"]))
vote_dict = dict(zip(df_edges["id_title"],df_edges["up votes"]))
keyword_dict = dict(zip(df_edges["id_keyword"],df_edges["keyword"]))
nx.set_node_attributes(G, img_dict, 'img')
nx.set_node_attributes(G, comment_dict, 'comment')
nx.set_node_attributes(G, vote_dict, 'vote')
nx.set_node_attributes(G, title_dict, 'title')
nx.set_node_attributes(G, keyword_dict, 'keyword')

degree_dict = dict(G.degree(G.nodes()))
nx.set_node_attributes(G, degree_dict, 'degree')

df_edges["date"] = df_edges["date"].dt.strftime('%Y,%m,%d')
date_dict = dict(zip(df_edges["id_title"],df_edges["date"]))
nx.set_node_attributes(G, date_dict, 'time')

In [13]:
# write to graphnetwork file

nx.write_graphml(G, "food_network_11_1-11_15.graphml")