# Food Graph  

In [None]:
import pandas as pd
import uuid
import numpy as np

In [None]:
ZOMATO_RESTAURANTS_CSV = ""   # Path to zomato_restaurants_csv  https://www.kaggle.com/himanshupoddar/zomato-bangalore-restaurants

### Restaurants Nodes

In [None]:
df = pd.read_csv(ZOMATO_RESTAURANTS_CSV)
df = df.fillna("")
df = df.drop_duplicates(['name'])
df['uuid:ID(Restaurant)'] = [str(uuid.uuid4()) for i in range(len(df))]
df = df.drop(['rate','approx_cost(for two people)','phone','votes'],axis=1)
df.to_csv("./restaurant.csv")

### Location Nodes

In [None]:
locations = pd.DataFrame()
locations['name'] = df['location'].unique()
locations['uuid:ID(Location)'] = [str(uuid.uuid4()) for i in range(len(locations))]
locations.set_index('name',inplace=True)
locations.to_csv("./locations.csv")

### Restaurant-Location Relation (In Area)

In [None]:
restaurant_location = pd.DataFrame({':START_ID(Restaurant)':df['uuid:ID(Restaurant)'].values})
restaurant_location[':END_ID(Location)'] = df['location'].apply(lambda x:locations.loc[x,"uuid:ID(Location)"])
restaurant_location.to_csv("./restaurant_location.csv")

### Type Nodes

In [None]:
df['rest_type'] = df['rest_type'].apply(lambda x: list(map(lambda s:s.strip(), x.split(','))))
type_restaurant = df['rest_type'].tolist()
unique_types = []
for i in type_restaurant:
    for j in i:
        unique_types.append(j.strip())
unique_types = list(set(unique_types))
types = pd.DataFrame()
types['name'] = unique_types
types['uuid:ID(Type)'] = [str(uuid.uuid4()) for i in range(len(types))]
types.set_index('name',inplace=True)
types.to_csv("./type.csv")

### Restaurant-Type Node (Fall Under)

In [None]:
type_restaurant = pd.DataFrame({':START_ID(Restaurant)':np.repeat(df['uuid:ID(Restaurant)'].values, df['rest_type'].str.len()),':END_ID(Type)':np.concatenate(df['rest_type'].values)})
type_restaurant[':END_ID(Type)'] = type_restaurant[':END_ID(Type)'].apply(lambda x:types.loc[x,"uuid:ID(Type)"])
type_restaurant.to_csv("./type_restaurant.csv")

### Cuisine Nodes

In [None]:
df['cuisines'] = df['cuisines'].apply(lambda x: list(map(lambda s:s.strip("[]").replace("'",""), x.split(','))))
cuisines = df['cuisines'].tolist()
unique_cuisines = []
for i in cuisines:
    for j in i:
        unique_cuisines.append(j)
unique_cuisines = list(set(unique_cuisines))
cuisines = pd.DataFrame()
cuisines['name'] = unique_cuisines
cuisines['uuid:ID(Cuisine)'] = [str(uuid.uuid4()) for i in range(len(cuisines))]
cuisines.set_index('name',inplace=True)
cuisines.to_csv("./cuisine.csv")

### Restaurant-Cuisine Relation (Serves Cuisine)

In [None]:
cuisine_restaurant = []
for resto in df[['uuid:ID(Restaurant)','cuisines']].itertuples():
    for cus in resto[2]:
        cuisine_restaurant.append([resto[1],cuisines.loc[cus,"uuid:ID(Cuisine)"]])

cuisine_restaurant = pd.DataFrame(cuisine_restaurant,columns=[':START_ID(Restaurant)',':END_ID(Cuisine)'])
cuisine_restaurant.to_csv("./cuisine_restaurant.csv")

### Dish Nodes

In [None]:
df['menu_item'] = df['menu_item'].apply(lambda x: list(map(lambda s:s.strip("[]").replace("'","").strip(" "), x.split(','))))
dishes = df['menu_item'].tolist()
unique_dishes = []
for i in dishes:
    for j in i:
        unique_dishes.append(j)
unique_dishes = list(set(unique_dishes))
dishes = pd.DataFrame()
dishes['name'] = unique_dishes
dishes['uuid:ID(Dish)'] = [str(uuid.uuid4()) for i in range(len(dishes))]
dishes.set_index('name',inplace=True)
dishes.to_csv("./dishes.csv")

### Restaurant-Dish Relation (Serves Dish)

In [None]:
restaurant_dish = pd.DataFrame({':START_ID(Restaurant)':np.repeat(df['uuid:ID(Restaurant)'].values, df['menu_item'].str.len()),':END_ID(Dish)':np.concatenate(df['menu_item'].values)})
restaurant_dish[':END_ID(Dish)'] = restaurant_dish[':END_ID(Dish)'].apply(lambda x:dishes.loc[x,"uuid:ID(Dish)"])
restaurant_dish.to_csv("./restaurant_dish.csv")

### Restaurant-Dish Relation (Famous For)

In [None]:
liked_dishes_restaurant = []
for resto in df[['uuid:ID(Restaurant)','dish_liked']].itertuples():
    for cus in resto[2].strip().split(","):
        try:
            liked_dishes_restaurant.append([resto[1],dishes.loc[cus.strip(),"uuid:ID(Dish)"]])
        except:
            pass

liked_dishes_restaurant = pd.DataFrame(liked_dishes_restaurant,columns=[':START_ID(Restaurant)',':END_ID(Dish)'])
liked_dishes_restaurant.to_csv("./liked_dishes_restaurant.csv")

### To directly import the csv into  Neo4j using neo4j-admin import run

In [None]:
# sudo ./bin/neo4j-admin import --database zomato.db  --mode csv --ignore-missing-nodes --multiline-fields=True --ignore-duplicate-nodes --nodes:Location=import/locations.csv --nodes:Restaurant=import/restaurant.csv --nodes:Dish=import/dishes.csv --nodes:Type=import/type.csv --nodes:Cuisine=import/cuisine.csv --relationships:SERVES_CUISINE=import/cuisine_restaurant.csv --relationships:SERVES_DISH=import/restaurant_dish.csv --relationships:FALL_UNDER import/type_restaurant.csv --relationships:FAMOUS_FOR=import/liked_dishes_restaurant.csv --relationships:IN_AREA=import/restaurant_location.csv