### Import Libraries

In [1]:
import pandas as pd
from dotenv import load_dotenv
import os
load_dotenv()

True

### Load Secrets from .env

In [2]:
NEO4J_URI = os.getenv("NEO4J_URI")
NEO4J_USER = os.getenv("NEO4J_USERNAME")
NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD")

### Connect to Neo4j AuraDB

In [3]:
from neo4j import GraphDatabase
AUTH = (NEO4J_USER, NEO4J_PASSWORD)

with GraphDatabase.driver(NEO4J_URI, auth=AUTH) as driver:
    driver.verify_connectivity()

## Load the data

In [4]:
player_df=pd.read_excel('../processed_data/Football_players_data.xlsx')
player_df.head(3)

Unnamed: 0,name,Age,Team,Overall rating,Potential,foot,Best overall,Best position,Best position expansion,Value,Wage,Release clause
0,Ronaldinho,34,Querétaro,78,78,Right,77,CAM,Central Attacking Midfielder,4100000,45000,0.0
1,O. Hutchinson,19,Ipswich Town,65,81,Left,67,CAM,Central Attacking Midfielder,1800000,3000,4500000.0
2,Brahim,23,Real Madrid,82,86,Right,84,CAM,Central Attacking Midfielder,43500000,145000,92400000.0


In [5]:
player_df.columns

Index(['name', 'Age', 'Team', 'Overall rating', 'Potential', 'foot',
       'Best overall', 'Best position', 'Best position expansion', 'Value',
       'Wage', 'Release clause'],
      dtype='object')

### Write Cypher query for data ingestion

In [7]:
def insert_player_data(tx, row):
    tx.run('''
        // Create or merge Player node
        MERGE (player:Player {name: $name})
        ON CREATE SET 
            player.age = $Age,
            player.overall_rating = $`Overall rating`,
            player.potential = $Potential,
            player.best_overall = $`Best overall`,
            player.value = $Value,
            player.wage = $Wage,
            player.release_clause = $`Release clause`

        // Create or merge Club node
        MERGE (club:Club {name: $Team})
        
        // Define the relationship between Player and Club
        MERGE (player)-[:PLAYS_FOR {since: date()}]->(club)

        // Create or merge Foot node
        MERGE (foot:Foot {type: $foot})
        
        // Define the relationship between Player and Foot
        MERGE (player)-[:USES_FOOT]->(foot)

        // Create or merge Best Position node
        MERGE (position:Position {name: $`Best position`})
        
        // Define the relationship between Player and Position
        MERGE (player)-[:PLAYS_AS]->(position)

        // Create or merge Role node
        MERGE (role:Role {name: $`Best position expansion`})
        
        // Define the relationship between Player and Role
        MERGE (player)-[:HAS_ROLE]->(role)

        // Add relationships to numeric performance attributes
        MERGE (performance:Performance {
            overall_rating: $`Overall rating`,
            potential: $Potential,
            best_overall: $`Best overall`
        })
        MERGE (player)-[:HAS_PERFORMANCE]->(performance)

        // Add relationships for financial details
        MERGE (financials:Financials {
            value: $Value,
            wage: $Wage,
            release_clause: $`Release clause`
        })
        MERGE (player)-[:HAS_FINANCIALS]->(financials)

        // Create or merge AgeGroup node
        WITH
            CASE
                WHEN $Age < 20 THEN 'Teenager'
                WHEN $Age >= 20 AND $Age <= 25 THEN 'Young Adult'
                WHEN $Age > 25 AND $Age <= 30 THEN 'Prime'
                ELSE 'Veteran'
            END AS age_group
        MERGE (ageGroup:AgeGroup {name: age_group})

        // Define the relationship between Player and AgeGroup
        MERGE (player)-[:BELONGS_TO_AGE_GROUP]->(ageGroup)
    ''', row)

# Iterate through the DataFrame and insert data into Neo4j
with driver.session() as session:
    for _, row in player_df.iterrows():
        # Convert row to dictionary
        row_dict = row.to_dict()
        
        # Ensure numeric fields are properly converted to numbers
        numeric_fields = [
            'Age', 'Overall rating', 'Potential', 'Best overall', 
            'Value', 'Wage', 'Release clause'
        ]
        for field in numeric_fields:
            row_dict[field] = float(row_dict.get(field, 0))  # Default to 0 if value is missing
        
        # Write transaction
        session.execute_write(insert_player_data, row_dict)


  with driver.session() as session:
