### Import Libraries

In [1]:
from dotenv import load_dotenv
load_dotenv()
import os
from langchain_google_genai import ChatGoogleGenerativeAI

import pandas as pd
import numpy as np
from langchain_community.graphs import Neo4jGraph
from langchain_groq import ChatGroq
from langchain.chains import GraphCypherQAChain

### Load Secrets from .env

In [None]:
llm = ChatGoogleGenerativeAI(
    model="gemini-pro",
    temperature=0,
    api_key=os.getenv('GEMINI_API_KEY')
)

neo4j_url = os.getenv("NEO4J_URI")
neo4j_user = os.getenv("NEO4J_USERNAME")
neo4j_password = os.getenv("NEO4J_PASSWORD")
graph = Neo4jGraph(url=neo4j_url,username=neo4j_user,password=neo4j_password,sanitize=True,enhanced_schema=True)

  from .autonotebook import tqdm as notebook_tqdm


### Connect to Neo4j AuraDB

In [5]:
graph.refresh_schema()
print(graph.schema)



Node properties:
- **Player**
  - `name`: STRING Example: "Ronaldinho"
  - `age`: FLOAT Min: 16.0, Max: 44.0
  - `overall_rating`: FLOAT Min: 47.0, Max: 91.0
  - `potential`: FLOAT Min: 54.0, Max: 94.0
  - `best_overall`: FLOAT Min: 49.0, Max: 93.0
  - `value`: FLOAT Min: 0.0, Max: 1.85E8
  - `wage`: FLOAT Min: 0.0, Max: 350000.0
  - `release_clause`: FLOAT Min: 0.0, Max: 3.561E8
- **Club**
  - `name`: STRING Example: "Querétaro"
- **Foot**
  - `type`: STRING Available options: ['Right', 'Left']
- **Position**
  - `name`: STRING Example: "CAM"
- **Role**
  - `name`: STRING Example: "Central Attacking Midfielder"
- **Performance**
  - `overall_rating`: FLOAT Min: 47.0, Max: 91.0
  - `potential`: FLOAT Min: 54.0, Max: 94.0
  - `best_overall`: FLOAT Min: 49.0, Max: 93.0
- **Financials**
  - `value`: FLOAT Min: 0.0, Max: 1.85E8
  - `wage`: FLOAT Min: 0.0, Max: 350000.0
  - `release_clause`: FLOAT Min: 0.0, Max: 3.561E8
- **AgeGroup**
  - `name`: STRING Available options: ['Veteran', 'Teena

### Zero shot prompting

In [6]:
cypher_model = ChatGroq(temperature=0, model_name="llama-3.1-8b-instant", groq_api_key = os.getenv("GROQ_API_KEY"))
# qa_model = ChatGroq(temperature=0, model_name="llama-3.1-70b-versatile", groq_api_key = groq_api_key)
qa_model = ChatGroq(temperature=0, model_name="gemma2-9b-it", groq_api_key = os.getenv("GROQ_API_KEY"))
# qa_model = ChatGroq(temperature=0, model_name="llama3-70b-8192", groq_api_key = groq_api_key)

In [7]:
questions = ["List all players who play for FC Barcelona?",
             'List players with higher wage',
             'Give me some players whos playing position is CF',
             'Give me some players whos playing position is CAM and plays for FC barcelona',
             'list me some bayern players',
             'list me some right foot players',
             'Which players have a potential rating of 85 or higher?',
             'What are the best positions of players in Borussia Dortmund?']

In [8]:
chain = GraphCypherQAChain.from_llm(graph=graph,
                                    cypher_llm = cypher_model,
                                    qa_llm= qa_model,
                                    verbose=True,
                                    validate_cypher = True,
                                    allow_dangerous_requests=True
                                    # use_function_response = True
                                    )
results=[]
for q in questions:
    print('====== START ======')
    try:
        print(q)
        result = chain.invoke(q)['result']
        results.append(result)
        print(result)
    except:
        pass
    print('====== END ====== \n')

List all players who play for FC Barcelona?


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (p:Player)-[:PLAYS_FOR]->(c:Club) WHERE c.name = "FC Barcelona" RETURN p[0m
Full Context:
[32;1m[1;3m[{'p': {'name': 'Vitor Roque', 'overall_rating': 76.0, 'potential': 88.0, 'best_overall': 78.0, 'value': 17500000.0, 'age': 18.0, 'release_clause': 39400000.0, 'wage': 44000.0}}, {'p': {'name': 'Pau Cubarsí', 'overall_rating': 64.0, 'potential': 83.0, 'best_overall': 66.0, 'value': 1400000.0, 'age': 16.0, 'release_clause': 3700000.0, 'wage': 1000.0}}, {'p': {'name': 'Balde', 'overall_rating': 81.0, 'potential': 89.0, 'best_overall': 81.0, 'value': 53000000.0, 'age': 19.0, 'release_clause': 119300000.0, 'wage': 70000.0}}, {'p': {'name': 'R. Lewandowski', 'overall_rating': 90.0, 'potential': 90.0, 'best_overall': 90.0, 'value': 58000000.0, 'age': 34.0, 'release_clause': 118900000.0, 'wage': 340000.0}}, {'p': {'name': 'Pedri', 'overall_rating': 86.0, 'pot

In [9]:
for question,result in zip(questions, results):
    print("Question: ",question)
    print("Response: ",result)

Question:  List all players who play for FC Barcelona?
Response:  Vitor Roque, Pau Cubarsí, Balde, R. Lewandowski, Pedri, F. de Jong, Gavi, R. Araujo, Fermín, Raphinha 

Question:  List players with higher wage
Response:  K. De Bruyne has a higher wage.  

Question:  Give me some players whos playing position is CF
Response:  I don't know the answer. 

Question:  Give me some players whos playing position is CAM and plays for FC barcelona
Response:  I don't know the answer. 

Question:  list me some bayern players
Response:  I don't know the answer. 

Question:  list me some right foot players
Response:  Ronaldinho, Brahim, K. Wätjen, J. Bellingham, A. Isak, M. Reus, L. Stergiou, W. Zaïre-Emery, C. Hudson-Odoi, V. Gyökeres 

Question:  Which players have a potential rating of 85 or higher?
Response:  K. Havertz 

Question:  What are the best positions of players in Borussia Dortmund?
Response:  I don't know the answer. 



### Few shot prompt template

In [10]:
examples = [
    {
        "question": "Which players have an overall rating above 80?",
        "query": "MATCH (p:Player) WHERE p.overall_rating > 80 RETURN p.name"
    },
    {
        "question": "What is the team of the player named Ronaldinho?",
        "query": "MATCH (p:Player {{name: 'Ronaldinho'}})-[:PLAYS_FOR]->(t:Team) RETURN t.name"
    },
    {
        "question": "List all players who play for Real Madrid?",
        "query": "MATCH (p:Player)-[:PLAYS_FOR]->(t:Team {{name: 'Real Madrid'}}) RETURN p.name"
    },
    {
        "question": "Which players are left-footed?",
        "query": "MATCH (p:Player)-[:USES_FOOT]->(f:Foot {{type: 'Left'}}) RETURN p.name"
    },
    {
        "question": "What are the best positions of players in Borussia Dortmund?",
        "query": "MATCH (p:Player)-[:PLAYS_FOR]->(t:Team {{name: 'Borussia Dortmund'}}), (p)-[:BEST_POSITION]->(pos:Position) RETURN p.name, pos.name"
    },
    {
        "question": "Which players have a potential rating of 85 or higher?",
        "query": "MATCH (p:Player) WHERE p.potential >= 85 RETURN p.name"
    },
    {
        "question": "What is the release clause of players whose value exceeds 10 million?",
        "query": "MATCH (p:Player) WHERE p.value > 10000000 RETURN p.name, p.release_clause"
    },
    {
        "question": "List all players who are 20 years old or younger and play for Ipswich Town.",
        "query": "MATCH (p:Player)-[:PLAYS_FOR]->(t:Team {{name: 'Ipswich Town'}}) WHERE p.age <= 20 RETURN p.name"
    },
    {
        "question": "Which players have 'Central Attacking Midfielder' as their position expansion?",
        "query": "MATCH (p:Player)-[:POSITION_EXPANDED_AS]->(pe:PositionExpansion {{description: 'Central Attacking Midfielder'}}) RETURN p.name"
    },
    {
        "question": "Which players play for teams based in Spain?",
        "query": "MATCH (p:Player)-[:PLAYS_FOR]->(t:Team) WHERE t.name IN ['Real Madrid'] RETURN p.name"
    },
    {
        "question": "Which players are right-footed and have a wage above 50,000?",
        "query": "MATCH (p:Player)-[:USES_FOOT]->(f:Foot {{type: 'Right'}}) WHERE p.wage > 50000 RETURN p.name"
    },
    {
        "question": "Who are the players aged 25 or older with a best overall rating below 70?",
        "query": "MATCH (p:Player) WHERE p.age >= 25 AND p.best_overall < 70 RETURN p.name"
    },
    {
        "question": "What is the best position of the youngest player in the dataset?",
        "query": "MATCH (p:Player)-[:BEST_POSITION]->(pos:Position) RETURN p.name, pos.name ORDER BY p.age ASC LIMIT 1"
    },
    {
        "question": "What is the overall rating and team of players named Brahim?",
        "query": "MATCH (p:Player {{name: 'Brahim'}})-[:PLAYS_FOR]->(t:Team) RETURN p.name, p.overall_rating, t.name"
    },
    {
        "question": "How many players are there with an overall rating above 70 and potential above 80?",
        "query": "MATCH (p:Player) WHERE p.overall_rating > 70 AND p.potential > 80 RETURN COUNT(p) AS TotalPlayers"
    }
]

In [11]:
from langchain_core.prompts import FewShotPromptTemplate, PromptTemplate

example_prompt = PromptTemplate.from_template(
    "User input: {question}\nCypher query: {query}"
)
prompt = FewShotPromptTemplate(
    examples=examples[:3],
    example_prompt=example_prompt,
    prefix="You are a Neo4j expert. Given an input question, create a syntactically correct Cypher query to run.\n\nHere is the schema information\n{schema}.\n\nBelow are a number of examples of questions and their corresponding Cypher queries.",
    suffix="User input: {question}\nCypher query: ",
    input_variables=["question", "schema"],
)

In [12]:
print(prompt.format(question="Name 3 FC barcelona players", schema=graph.schema))

You are a Neo4j expert. Given an input question, create a syntactically correct Cypher query to run.

Here is the schema information
Node properties:
- **Player**
  - `name`: STRING Example: "Ronaldinho"
  - `age`: FLOAT Min: 16.0, Max: 44.0
  - `overall_rating`: FLOAT Min: 47.0, Max: 91.0
  - `potential`: FLOAT Min: 54.0, Max: 94.0
  - `best_overall`: FLOAT Min: 49.0, Max: 93.0
  - `value`: FLOAT Min: 0.0, Max: 1.85E8
  - `wage`: FLOAT Min: 0.0, Max: 350000.0
  - `release_clause`: FLOAT Min: 0.0, Max: 3.561E8
- **Club**
  - `name`: STRING Example: "Querétaro"
- **Foot**
  - `type`: STRING Available options: ['Right', 'Left']
- **Position**
  - `name`: STRING Example: "CAM"
- **Role**
  - `name`: STRING Example: "Central Attacking Midfielder"
- **Performance**
  - `overall_rating`: FLOAT Min: 47.0, Max: 91.0
  - `potential`: FLOAT Min: 54.0, Max: 94.0
  - `best_overall`: FLOAT Min: 49.0, Max: 93.0
- **Financials**
  - `value`: FLOAT Min: 0.0, Max: 1.85E8
  - `wage`: FLOAT Min: 0.0, Max

In [13]:
chain_with_few_shot = GraphCypherQAChain.from_llm(graph=graph,
                                                  cypher_llm=cypher_model,
                                                  qa_llm=qa_model,
                                                  cypher_prompt=prompt, # add this
                                                  verbose=True,
                                                  validate_cypher = True,
                                                  allow_dangerous_requests=True
                                                  )
results=[]
for q in questions:
    print("\n", q)
    try:
        result = chain_with_few_shot.invoke(q)['result']
        results.append(result)
        print(result)
    except:
        pass


 List all players who play for FC Barcelona?


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mBased on the provided schema and the given examples, I can create a Cypher query for the user input "List all players who play for FC Barcelona".

Since the schema defines a node type 'Club' instead of 'Team', the correct query should be:

Cypher query: MATCH (p:Player)-[:PLAYS_FOR]->(c:Club {name: 'FC Barcelona'}) RETURN p.name[0m

 List players with higher wage


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (p:Player)-[:HAS_FINANCIALS]->(f:Financials)
RETURN p.name, f.wage
ORDER BY f.wage DESC
[0m
Full Context:
[32;1m[1;3m[{'p.name': 'K. De Bruyne', 'f.wage': 350000.0}, {'p.name': 'E. Haaland', 'f.wage': 340000.0}, {'p.name': 'R. Lewandowski', 'f.wage': 340000.0}, {'p.name': 'Vini Jr.', 'f.wage': 310000.0}, {'p.name': 'F. Valverde', 'f.wage': 270000.0}, {'p.name': 'Bernardo Silva', 'f.wage': 270000.0}, 

In [14]:
for question,result in zip(questions, results):
    print("Question: ",question)
    print("Response: ",result)

Question:  List all players who play for FC Barcelona?
Response:  K. De Bruyne has a higher wage.  

Question:  List players with higher wage
Response:  W. Ben Yedder, Iago Aspas  

Question:  Give me some players whos playing position is CF
Response:  Fermín, João Félix, Marc Casadó, and Xavi. 

Question:  Give me some players whos playing position is CAM and plays for FC barcelona
Response:  I don't know the answer. 

Question:  list me some bayern players
Response:  Ronaldinho, Brahim, K. Wätjen, J. Bellingham, A. Isak, M. Reus, L. Stergiou, W. Zaïre-Emery, C. Hudson-Odoi, V. Gyökeres 

Question:  list me some right foot players
Response:  I don't know the answer. 

Question:  Which players have a potential rating of 85 or higher?
Response:  I don't know the answer. 



## Dynamic Few Shot Prompting

In [15]:
from langchain_community.vectorstores import Neo4jVector
from langchain_core.example_selectors import SemanticSimilarityExampleSelector
from langchain_community.embeddings import HuggingFaceEmbeddings

example_selector = SemanticSimilarityExampleSelector.from_examples(
    examples,
    HuggingFaceEmbeddings(),
    Neo4jVector,
    url = neo4j_url,
    username = neo4j_user,
    password = neo4j_password,
    k=3,
    input_keys=["question"],
)

  HuggingFaceEmbeddings(),
  HuggingFaceEmbeddings(),


In [16]:

dynamic_prompt = FewShotPromptTemplate(
    example_selector=example_selector, #previous: examples = examples[:3]
    example_prompt=example_prompt,
    prefix="You are a Neo4j expert. Given an input question, create a syntactically correct Cypher query to run.\n\nHere is the schema information\n{schema}.\n\nBelow are a number of examples of questions and their corresponding Cypher queries. Don't add any preambles, just return the correct cypher query",
    suffix="User input: {question}\nCypher query: ",
    input_variables=["question", "schema"],
)

In [17]:
print(dynamic_prompt.format(question="Where does Ansu Fati play?", schema= graph.schema))

You are a Neo4j expert. Given an input question, create a syntactically correct Cypher query to run.

Here is the schema information
Node properties:
- **Player**
  - `name`: STRING Example: "Ronaldinho"
  - `age`: FLOAT Min: 16.0, Max: 44.0
  - `overall_rating`: FLOAT Min: 47.0, Max: 91.0
  - `potential`: FLOAT Min: 54.0, Max: 94.0
  - `best_overall`: FLOAT Min: 49.0, Max: 93.0
  - `value`: FLOAT Min: 0.0, Max: 1.85E8
  - `wage`: FLOAT Min: 0.0, Max: 350000.0
  - `release_clause`: FLOAT Min: 0.0, Max: 3.561E8
- **Club**
  - `name`: STRING Example: "Querétaro"
- **Foot**
  - `type`: STRING Available options: ['Right', 'Left']
- **Position**
  - `name`: STRING Example: "CAM"
- **Role**
  - `name`: STRING Example: "Central Attacking Midfielder"
- **Performance**
  - `overall_rating`: FLOAT Min: 47.0, Max: 91.0
  - `potential`: FLOAT Min: 54.0, Max: 94.0
  - `best_overall`: FLOAT Min: 49.0, Max: 93.0
- **Financials**
  - `value`: FLOAT Min: 0.0, Max: 1.85E8
  - `wage`: FLOAT Min: 0.0, Max

In [18]:
chain_with_dynamic_few_shot = GraphCypherQAChain.from_llm(graph=graph,
                                                          cypher_llm=cypher_model,
                                                          qa_llm=qa_model,
                                                          cypher_prompt=dynamic_prompt, # don't forget to change this into the dynamic_prompt
                                                          verbose=True,
                                                          validate_cypher = True,
                                                          allow_dangerous_requests=True
                                                          )
results=[]
for q in questions:
    print("\n", q)
    try:
        result = chain_with_dynamic_few_shot.invoke(q)['result']
        results.append(result)
        print(result)
    except:
        pass


 List all players who play for FC Barcelona?


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (p:Player)-[:PLAYS_FOR]->(t:Club {name: 'FC Barcelona'}) RETURN p.name[0m
Full Context:
[32;1m[1;3m[{'p.name': 'Vitor Roque'}, {'p.name': 'Pau Cubarsí'}, {'p.name': 'Balde'}, {'p.name': 'R. Lewandowski'}, {'p.name': 'Pedri'}, {'p.name': 'F. de Jong'}, {'p.name': 'Gavi'}, {'p.name': 'R. Araujo'}, {'p.name': 'Fermín'}, {'p.name': 'Raphinha'}][0m

[1m> Finished chain.[0m
Vitor Roque, Pau Cubarsí, Balde, R. Lewandowski, Pedri, F. de Jong, Gavi, R. Araujo, Fermín, Raphinha  


 List players with higher wage


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (p1:Player)-[:HAS_FINANCIALS]->(f1:Financials), (p2:Player)-[:HAS_FINANCIALS]->(f2:Financials) WHERE f1.wage > f2.wage RETURN p1.name[0m

 Give me some players whos playing position is CF


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:


In [19]:
for question,result in zip(questions, results):
    print("Question: ",question)
    print("Response: ",result)

Question:  List all players who play for FC Barcelona?
Response:  Vitor Roque, Pau Cubarsí, Balde, R. Lewandowski, Pedri, F. de Jong, Gavi, R. Araujo, Fermín, Raphinha  

Question:  List players with higher wage
Response:  L. Trossard, M. Depay, W. Ben Yedder, Iago Aspas, Diogo Jota, F. Thauvin, D. Kyereh, L. Vietto  

Question:  Give me some players whos playing position is CF
Response:  Fermín, João Félix, Marc Casadó, and Xavi. 

Question:  Give me some players whos playing position is CAM and plays for FC barcelona
Response:  I don't know the answer. 

Question:  list me some bayern players
Response:  Ronaldinho, Brahim, K. Wätjen, J. Bellingham, A. Isak, M. Reus, L. Stergiou, W. Zaïre-Emery, C. Hudson-Odoi, V. Gyökeres 

Question:  list me some right foot players
Response:  I don't know the answer. 

Question:  Which players have a potential rating of 85 or higher?
Response:  CAM, ST, LM, LWB are the best positions of players in Borussia Dortmund. 

