In [1]:
from IPython.display import clear_output
%pip install langchain langchain-community langchain-groq langchain-experimental neo4j groq
clear_output()

In [3]:
from langchain_community.graphs import Neo4jGraph
from langchain_groq import ChatGroq

from dotenv import load_dotenv
import os, re
from neo4j import GraphDatabase
import pandas as pd
from groq import Groq

load_dotenv()
llm = ChatGroq(
            temperature=0,
            model='llama-3.1-70b-versatile',
            api_key=os.environ.get('GROQ_API_KEY')
        )
def reset():
    db = GraphDatabase.driver(uri=os.environ.get('NEO4J_URI'), auth=(os.environ.get('NEO4J_USERNAME'), os.environ.get('NEO4J_PASSWORD')))
    with db.session(database='system') as session:
        session.run("""DROP DATABASE neo4j""")
        session.run('''CREATE DATABASE neo4j''')
    db.close()

def extract_query(input_string):
    pattern = r'```(.*?)```'

    matches = re.findall(pattern, input_string, re.DOTALL)

    final_text = ''
    for match in matches:
        final_text+=match.strip()

    return final_text

def get_schema(path):
    
    data = pd.read_csv(path)
    schema_details = []
    for column in data.columns:
        dtype = data[column].dtype
        non_null_count = data[column].notnull().sum()
        null_count = data[column].isnull().sum()
        is_nullable = null_count > 0
        schema_details.append(f"Column: {column}, Type: {dtype}, Nullable: {is_nullable}, Non-Null Count: {non_null_count}, Null Count: {null_count}")
    schema_string = "\n".join(schema_details)
    return 'path: '+ path +", "+ schema_string 

def create_query(path):
    client = Groq()
    schema = get_schema(path=path)
    chat_completion = client.chat.completions.create(
        messages=[
                {
                        "role":"system",
                        "content":"""analyse the format given below and create a similer code for the given input.
                                    schema: path: movies.csv, Column: movieId, Type: int64, Nullable: False, Non-Null Count: 8964, Null Count: 0\nColumn: released, Type: object, Nullable: False, Non-Null Count: 8964, Null Count: 0\nColumn: title, Type: object, Nullable: False, Non-Null Count: 8964, Null Count: 0\nColumn: actors, Type: object, Nullable: False, Non-Null Count: 8964, Null Count: 0\nColumn: director, Type: object, Nullable: False, Non-Null Count: 8964, Null Count: 0\nColumn: genres, Type: object, Nullable: False, Non-Null Count: 8964, Null Count: 0\nColumn: imdbRating, Type: float64, Nullable: True, Non-Null Count: 8929, Null Count: 35
                                    sample neo4j code:
                                    LOAD CSV WITH HEADERS FROM 
                                    'file:///movies.csv'
                                    'path to csv file'
                                    AS row
                                    MERGE (m:Movie {id:row.movieId})
                                    SET m.released = date(row.released),
                                        m.title = row.title,
                                        m.imdbRating = toFloat(row.imdbRating)
                                    FOREACH (director in split(row.director, '|') | 
                                        MERGE (p:Person {name:trim(director)})
                                        MERGE (p)-[:DIRECTED]->(m))
                                    FOREACH (actor in split(row.actors, '|') | 
                                        MERGE (p:Person {name:trim(actor)})
                                        MERGE (p)-[:ACTED_IN]->(m))
                                    FOREACH (genre in split(row.genres, '|') | 
                                        MERGE (g:Genre {name:trim(genre)})
                                        MERGE (m)-[:IN_GENRE]->(g))
                                        
                                        Return only the code and nothing else"""
                },
                {
                    "role": "user",
                    "content": f"input: {schema}",
                }
        ],
        model="llama3-70b-8192",
    )
    data =  chat_completion.choices[0].message.content
    query = extract_query(data)
    print(query)
    return query

graph = Neo4jGraph()

In [40]:
comics = 'https://raw.githubusercontent.com/tomasonjo/blog-datasets/main/Marvel/comics.csv'
stocks = 'https://raw.githubusercontent.com/tomasonjo/blog-datasets/main/stocks/stock_prices.csv'
movies_small = 'https://raw.githubusercontent.com/tomasonjo/blog-datasets/main/movies/movies_small.csv'

In [None]:

graph.query(create_query('D:\Programming\Projects\Modus_Proj\movies.csv'))
graph.refresh_schema()


In [4]:
print(graph.schema)

Node properties:
Movie {imdbRating: FLOAT, id: STRING, released: DATE, title: STRING}
Person {name: STRING}
Genre {name: STRING}
Relationship properties:

The relationships:
(:Movie)-[:IN_GENRE]->(:Genre)
(:Person)-[:DIRECTED]->(:Movie)
(:Person)-[:ACTED_IN]->(:Movie)


In [5]:
from langchain.chains import GraphCypherQAChain

chain = GraphCypherQAChain.from_llm(graph=graph, llm=llm, verbose=True)
response = chain.invoke({"query": "when was toy story released?"})
response



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (m:Movie {title: "Toy Story"}) RETURN m.released[0m
Full Context:
[32;1m[1;3m[{'m.released': neo4j.time.Date(1995, 11, 22)}][0m

[1m> Finished chain.[0m


{'query': 'when was toy story released?',
 'result': 'Toy Story was released on November 22, 1995.'}

In [42]:
reset()