In [3]:
import json
import re
import unicodedata
import pandas as pd
import neo4j
import time
from neo4j import GraphDatabase

In [4]:
class Neo4jConnection:
    
    def __init__(self, uri, user, pwd):
        self.__uri = uri
        self.__user = user
        self.__pwd = pwd
        self.__driver = None
        try:
            self.__driver = GraphDatabase.driver(self.__uri, auth=(self.__user, self.__pwd))
        except Exception as e:
            print("Failed to create the driver:", e)
        
    def close(self):
        if self.__driver is not None:
            self.__driver.close()
        
    def query(self, query, parameters=None, db=None):
        assert self.__driver is not None, "Driver not initialized!"
        session = None
        response = None
        try: 
            session = self.__driver.session(database=db) if db is not None else self.__driver.session() 
            response = list(session.run(query, parameters))
        except Exception as e:
            print("Query failed:", e)
        finally: 
            if session is not None:
                session.close()
        return response

In [5]:
conn = Neo4jConnection(uri="neo4j://172.16.1.128:7687", 
                       user="",              
                       pwd="")

### Injesting data into neo4j

In [17]:
#function to insert data in batches in neo4j
def insert_data(query, rows, batch_size = 10000):
    
    total = 0
    batch = 0
    start = time.time()
    result = None
    
    while batch * batch_size < len(rows):
        res = conn.query(query, 
                         parameters = {'rows': rows[batch*batch_size:(batch+1)*batch_size].to_dict('records')})
        try:
            total += res[0]['total']
            batch += 1
            result = {"total":total, 
                      "batches_done":batch,
                      "total_batches": len(rows)/batch_size,
                      "time":time.time()-start}
            print(result)
        except Exception as e:
            print(e)
            batch += 1
        
    return result

##### Ingesting nodes

In [31]:
## Applying Constraints

# constraint_query = "CREATE CONSTRAINT FOR (m:Recipe) REQUIRE m.recipe_name IS UNIQUE;"
# conn.query(constraint_query)

In [32]:
indian_food_dataset = pd.read_csv('input_data/indian_food_dataset.csv')
indian_food_dataset.rename(columns={'Ingredient-count':'ingridient_number'},inplace=True) # neo4j was mistaking count as its inbuilt feature
indian_food_dataset.head()


Unnamed: 0,TranslatedRecipeName,TranslatedIngredients,TotalTimeInMins,Cuisine,TranslatedInstructions,URL,Cleaned-Ingredients,image-url,ingridient_number
0,Masala Karela Recipe,"1 tablespoon Red Chilli powder,3 tablespoon Gr...",45,Indian,"To begin making the Masala Karela Recipe,de-se...",https://www.archanaskitchen.com/masala-karela-...,"salt,amchur (dry mango powder),karela (bitter ...",https://www.archanaskitchen.com/images/archana...,10
1,Spicy Tomato Rice (Recipe),"2 teaspoon cashew - or peanuts, 1/2 Teaspoon ...",15,South Indian Recipes,"To make tomato puliogere, first cut the tomato...",https://www.archanaskitchen.com/spicy-tomato-r...,"tomato,salt,chickpea lentils,green chilli,rice...",https://www.archanaskitchen.com/images/archana...,12
2,Ragi Semiya Upma Recipe - Ragi Millet Vermicel...,"1 Onion - sliced,1 teaspoon White Urad Dal (Sp...",50,South Indian Recipes,"To begin making the Ragi Vermicelli Recipe, fi...",https://www.archanaskitchen.com/ragi-vermicell...,"salt,rice vermicelli noodles (thin),asafoetida...",https://www.archanaskitchen.com/images/archana...,12
3,Gongura Chicken Curry Recipe - Andhra Style Go...,"1/2 teaspoon Turmeric powder (Haldi),1 tablesp...",45,Andhra,To begin making Gongura Chicken Curry Recipe f...,https://www.archanaskitchen.com/gongura-chicke...,"tomato,salt,ginger,sorrel leaves (gongura),fen...",https://www.archanaskitchen.com/images/archana...,15
4,Andhra Style Alam Pachadi Recipe - Adrak Chutn...,"oil - as per use, 1 tablespoon coriander seed...",30,Andhra,"To make Andhra Style Alam Pachadi, first heat ...",https://www.archanaskitchen.com/andhra-style-a...,"tomato,salt,ginger,red chillies,curry,asafoeti...",https://www.archanaskitchen.com/images/archana...,12


**Ingesting Recipe node**

In [33]:
def add_recipe(rows, batch_size=10000):
    query = '''
            UNWIND $rows AS row
            MERGE (m:Recipe {recipe_name: row.TranslatedRecipeName,
                making_instruction:row.TranslatedInstructions,
                total_time_taken:row.TotalTimeInMins,
                total_ingridients:toInteger(row.ingridient_number)})
            RETURN count(*) as total
            '''
    return insert_data(query, rows, batch_size)


In [34]:
add_recipe(indian_food_dataset)

{'total': 5938, 'batches_done': 1, 'total_batches': 0.5938, 'time': 12.913150548934937}


{'total': 5938,
 'batches_done': 1,
 'total_batches': 0.5938,
 'time': 12.913150548934937}

**Ingesting Ingridients nodes and relationship**

In [35]:
ingridients_dataset = pd.read_csv('formatted_data/processed_ingridients_df.csv')
ingridients_dataset.head()

Unnamed: 0,TranslatedRecipeName,ingrident,ingredient_relation
0,Masala Karela Recipe,salt,salt - to taste
1,Masala Karela Recipe,amchur (dry mango powder),1 tablespoon amchur (dry mango powder)
2,Masala Karela Recipe,red chilli powder,1 tablespoon red chilli powder
3,Masala Karela Recipe,gram flour (besan),3 tablespoon gram flour (besan)
4,Masala Karela Recipe,onion,1 onion - thinly sliced


In [36]:
unique_ingridients_list = ingridients_dataset['ingrident'].unique()
unique_ingridients_dataframe = pd.DataFrame(unique_ingridients_list,columns=['ingridient_name'])
unique_ingridients_dataframe

Unnamed: 0,ingridient_name
0,salt
1,amchur (dry mango powder)
2,red chilli powder
3,gram flour (besan)
4,onion
...,...
1399,black soy beans
1400,gram vivatta maida
1401,sarsaparilla (mahali root)
1402,jal jeera powder


In [37]:
def add_ingridient_node(rows, batch_size=500):
    query = '''
            UNWIND $rows AS row
            MERGE (m:Ingridient {ingrident_name: row.ingridient_name})
            RETURN count(*) as total
            '''
    return insert_data(query, rows, batch_size)


In [38]:
# add_ingridient_node(unique_ingridients_dataframe)

{'total': 500, 'batches_done': 1, 'total_batches': 2.808, 'time': 0.11142325401306152}
{'total': 1000, 'batches_done': 2, 'total_batches': 2.808, 'time': 0.36146974563598633}
{'total': 1404, 'batches_done': 3, 'total_batches': 2.808, 'time': 0.6804018020629883}


{'total': 1404,
 'batches_done': 3,
 'total_batches': 2.808,
 'time': 0.6804018020629883}

In [41]:
def add_directors_relation(rows, batch_size=10000):
    query = '''
            UNWIND $rows AS row
            
            WITH row
            MATCH (m:Recipe {recipe_name: row.TranslatedRecipeName})
            MATCH (a:Ingridient {ingrident_name: row.ingridient_name})
            MERGE (m)-[r:HAS_INGRIDIENT]->(a)
            SET r.ingridient_quantity = row.ingredient_relation
            
            RETURN count(distinct a) as total
            '''
    return insert_data(query, rows, batch_size)


In [42]:
add_directors_relation(ingridients_dataset)

{'total': 0, 'batches_done': 1, 'total_batches': 6.637, 'time': 5.60310959815979}
{'total': 0, 'batches_done': 2, 'total_batches': 6.637, 'time': 11.233957052230835}
{'total': 0, 'batches_done': 3, 'total_batches': 6.637, 'time': 16.88274645805359}
{'total': 0, 'batches_done': 4, 'total_batches': 6.637, 'time': 22.58536410331726}
{'total': 0, 'batches_done': 5, 'total_batches': 6.637, 'time': 28.10410189628601}
{'total': 0, 'batches_done': 6, 'total_batches': 6.637, 'time': 33.637457847595215}
{'total': 0, 'batches_done': 7, 'total_batches': 6.637, 'time': 37.23239731788635}


{'total': 0,
 'batches_done': 7,
 'total_batches': 6.637,
 'time': 37.23239731788635}