## Création d'un script `Python` qui dialogue avec `Neo4j` et exploite la `GDS`.

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from neo4j import GraphDatabase

Connexion à la base de données neo4j

In [None]:
uri = "neo4j://localhost:7687"
driver = GraphDatabase.driver(uri, auth=("neo4j", "mdp"))

Fonction qui execute une requête qui affiche le top 5 des clubs selon la valeur marchande de ses joueurs

In [3]:
def top_5_clubs_by_market_value(driver):
    query = """
    MATCH (c:Club)<-[:PLAYS_FOR]-(p:Player)
    RETURN c.name AS clubName, SUM(p.market_value_in_eur) AS totalMarketValue
    ORDER BY totalMarketValue DESC
    LIMIT 5
    """
    with driver.session() as session:
        result = session.run(query)
        data = []
        for record in result:
            data.append((record["clubName"], record["totalMarketValue"]))
        return data

In [None]:
clubs_data = top_5_clubs_by_market_value(driver)
print("Top 5 clubs by total market value:")
for club, mv in clubs_data:
    print(f"{club}: {mv} EUR")

Création de la projection `transfers_graph_weighted` (si pas encore créer)

In [9]:
with driver.session() as session:  
  session.run("""CALL gds.graph.project(
    'transfers_graph_weighted',
    'MATCH (n) WHERE n:Club RETURN id(n) AS id',
    'MATCH (c1:Club)<-[init:INITIATED_BY]-(t:Transfer)-[final:FINALIZED_BY]->(c2:Club)
    RETURN id(c1) AS source, id(c2) AS target, t.transfer_fee AS weight'
  )""")

Fonction qui lance l'algorithme `pageRank` de la `GDS`

In [10]:
def run_pagerank(driver):
    with driver.session() as session:
        session.run("""
            CALL gds.pageRank.write('transfers_graph_weighted', {
                relationshipWeightProperty: 'weight',
                writeProperty: 'pagerankWeighted'
            })
        """)

        result = session.run("""
            MATCH (c:Club)
            // Récupération de la somme de market_value_in_eur via un MATCH optionnel
            OPTIONAL MATCH (c)<-[:PLAYS_FOR]-(p:Player)
            WITH c, c.pagerankWeighted AS score, SUM(p.market_value_in_eur) AS totalValue
            ORDER BY score DESC
            LIMIT 5
            RETURN c.name AS clubName, score, totalValue
        """)

        data = []
        for record in result:
            data.append({
                "clubName": record["clubName"],
                "pagerankScore": record["score"],
                "totalMarketValue": record["totalValue"] or 0
            })

    return data

In [None]:
pagerank_data = run_pagerank(driver)

print("Top clubs by PageRank Weighted + total market value:")
for row in pagerank_data:
    print(f"{row['clubName']}: PageRank={row['pagerankScore']}, totalValue={row['totalMarketValue']}")

Deux graphiques l’un sous l’autre pour voir si un club “top PageRank” est aussi “top en valeur”, ou pas

In [None]:
df1 = pd.DataFrame(pagerank_data)  

df1 = df1.sort_values(by="pagerankScore", ascending=False)

fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(8, 8))

axes[0].bar(df1["clubName"], df1["pagerankScore"], color="steelblue")
axes[0].set_title("Top Clubs - PageRank Score")
axes[0].set_ylabel("PageRank")
axes[0].tick_params(axis='x', labelrotation=45)

axes[1].bar(df1["clubName"], df1["totalMarketValue"], color="darkorange")
axes[1].set_title("Top Clubs - Total Market Value")
axes[1].set_ylabel("Market Value (EUR)")
axes[1].tick_params(axis='x', labelrotation=45)

plt.tight_layout()
plt.show()

Corrélation entre `PageRank` et `MarketValue`

In [None]:
df2 = pd.DataFrame(pagerank_data)
df2["totalMarketValue"] = df2["totalMarketValue"] / 1_000_000

plt.figure(figsize=(8,6))
plt.scatter(df2["pagerankScore"], df2["totalMarketValue"], color="green")

for i, row in df2.iterrows():
    plt.text(row["pagerankScore"], row["totalMarketValue"], row["clubName"],
             fontsize=9, ha='left', va='bottom')

plt.xlabel("PageRank Score")
plt.ylabel("Market Value (millions EUR)")
plt.title("Club: PageRank vs. Market Value")
plt.show()
