In [None]:
import pandas as pd
from sqlalchemy import create_engine, text

engine = create_engine("postgresql://lol_user:lol_pass@localhost:55432/lol_db")

In [None]:
csv_path = "data/lol_dataset.csv"

chunksize = 20000

first = True
for chunk in pd.read_csv(csv_path, sep=";", dtype=str, chunksize=chunksize, low_memory=False):
    chunk.to_sql("raw_lol", engine, if_exists="replace" if first else "append", index=False)
    first = False

print("OK: raw_lol chargé")

OK: raw_lol chargé


In [None]:
with engine.connect() as conn:
    conn.execute(text("""
    CREATE TABLE IF NOT EXISTS games (
      game_id BIGINT PRIMARY KEY,
      game_start_utc TIMESTAMP NULL,
      game_duration INT NULL,
      game_mode TEXT NULL,
      game_type TEXT NULL,
      game_version TEXT NULL,
      map_id INT NULL,
      platform_id TEXT NULL,
      queue_id INT NULL
    );

    CREATE TABLE IF NOT EXISTS summoners (
      puuid TEXT PRIMARY KEY,
      summoner_id TEXT NULL,
      summoner_name TEXT NULL,
      summoner_level INT NULL
    );

    CREATE TABLE IF NOT EXISTS champions (
      champion_id INT PRIMARY KEY,
      champion_name TEXT NULL
    );

    CREATE TABLE IF NOT EXISTS participants (
      game_id BIGINT REFERENCES games(game_id),
      participant_id INT,
      puuid TEXT REFERENCES summoners(puuid),
      champion_id INT REFERENCES champions(champion_id),
      team_id INT NULL,
      win BOOLEAN NULL,
      individual_position TEXT NULL,
      team_position TEXT NULL,
      lane TEXT NULL,
      role TEXT NULL,
      PRIMARY KEY (game_id, participant_id)
    );

    CREATE TABLE IF NOT EXISTS participant_stats (
      game_id BIGINT,
      participant_id INT,
      kills INT NULL,
      deaths INT NULL,
      assists INT NULL,
      baron_kills INT NULL,
      dragon_kills INT NULL,
      gold_earned INT NULL,
      gold_spent INT NULL,
      total_damage_dealt BIGINT NULL,
      total_damage_dealt_to_champions BIGINT NULL,
      physical_damage_dealt_to_champions BIGINT NULL,
      magic_damage_dealt_to_champions BIGINT NULL,
      true_damage_dealt_to_champions BIGINT NULL,
      damage_dealt_to_objectives BIGINT NULL,
      damage_dealt_to_turrets BIGINT NULL,
      total_damage_taken BIGINT NULL,
      physical_damage_taken BIGINT NULL,
      magic_damage_taken BIGINT NULL,
      true_damage_taken BIGINT NULL,
      time_ccing_others BIGINT NULL,
      vision_score INT NULL,
      wards_placed INT NULL,
      wards_killed INT NULL,
      vision_wards_bought_in_game INT NULL,
      item0 INT NULL,
      item1 INT NULL,
      item2 INT NULL,
      item3 INT NULL,
      item4 INT NULL,
      item5 INT NULL,
      item6 INT NULL,
      PRIMARY KEY (game_id, participant_id),
      FOREIGN KEY (game_id, participant_id) REFERENCES participants(game_id, participant_id)
    );

    CREATE TABLE IF NOT EXISTS ranks (
      puuid TEXT REFERENCES summoners(puuid),
      queue_type TEXT, -- 'solo' ou 'flex'
      tier TEXT NULL,
      rank TEXT NULL,
      lp INT NULL,
      wins INT NULL,
      losses INT NULL,
      PRIMARY KEY (puuid, queue_type)
    );

    CREATE TABLE IF NOT EXISTS champion_mastery (
      puuid TEXT REFERENCES summoners(puuid),
      champion_id INT REFERENCES champions(champion_id),
      champion_mastery_level INT NULL,
      champion_mastery_points BIGINT NULL,
      champion_mastery_lastPlayTime_utc TIMESTAMP NULL,
      champion_mastery_pointsSinceLastLevel BIGINT NULL,
      champion_mastery_pointsUntilNextLevel BIGINT NULL,
      champion_mastery_tokensEarned INT NULL,
      PRIMARY KEY (puuid, champion_id)
    );
    """))
    conn.commit()

print("OK: schéma créé")


OK: schéma créé


In [None]:
from sqlalchemy import text

with engine.connect() as conn:
    conn.execute(text("""
    /* =========================================================
       CELLULE 4 - TRANSFORMATION RAW -> MODELE RELATIONNEL (ROBUSTE)
       - Casts sécurisés via NULLIF(...,'')
       - Filtres pour éviter les FK cassées (puuid, champion_id)
       - DISTINCT ON + ORDER BY pour ranks (évite CardinalityViolation)
       - participant_stats en DO UPDATE pour rejouabilité
       ========================================================= */

    /* =======================
       GAMES
       ======================= */
    INSERT INTO games
    SELECT DISTINCT
      NULLIF(game_id,'')::BIGINT,
      NULLIF(game_start_utc,'')::TIMESTAMP,
      NULLIF(game_duration,'')::INT,
      game_mode,
      game_type,
      game_version,
      NULLIF(map_id,'')::INT,
      platform_id,
      NULLIF(queue_id,'')::INT
    FROM raw_lol
    WHERE NULLIF(game_id,'') IS NOT NULL
    ON CONFLICT (game_id) DO NOTHING;

    /* =======================
       SUMMONERS
       ======================= */
    INSERT INTO summoners
    SELECT DISTINCT
      puuid,
      summoner_id,
      summoner_name,
      NULLIF(summoner_level,'')::INT
    FROM raw_lol
    WHERE puuid IS NOT NULL AND puuid <> ''
    ON CONFLICT (puuid) DO NOTHING;

    /* =======================
       CHAMPIONS
       ======================= */
    INSERT INTO champions
    SELECT DISTINCT
      NULLIF(champion_id,'')::INT,
      champion_name
    FROM raw_lol
    WHERE NULLIF(champion_id,'') IS NOT NULL
    ON CONFLICT (champion_id) DO NOTHING;

    /* =======================
       PARTICIPANTS
       - On exige puuid (FK summoners)
       - champion_id peut être NULL (autorisé), sinon FK champions
       ======================= */
    INSERT INTO participants
    SELECT DISTINCT
      NULLIF(game_id,'')::BIGINT,
      NULLIF(participant_id,'')::INT,
      puuid,
      NULLIF(champion_id,'')::INT,
      NULLIF(team_id,'')::INT,
      CASE WHEN win IN ('True','true','1','t','T','yes','YES') THEN TRUE ELSE FALSE END,
      individual_position,
      team_position,
      lane,
      role
    FROM raw_lol
    WHERE NULLIF(game_id,'') IS NOT NULL
      AND NULLIF(participant_id,'') IS NOT NULL
      AND puuid IS NOT NULL AND puuid <> ''
    ON CONFLICT (game_id, participant_id) DO NOTHING;

    /* =======================
       PARTICIPANT STATS
       - DO UPDATE pour rejouer l'ETL si besoin
       ======================= */
    INSERT INTO participant_stats (
      game_id, participant_id,
      kills, deaths, assists,
      baron_kills, dragon_kills,
      gold_earned, gold_spent,
      total_damage_dealt, total_damage_dealt_to_champions,
      physical_damage_dealt_to_champions, magic_damage_dealt_to_champions, true_damage_dealt_to_champions,
      damage_dealt_to_objectives, damage_dealt_to_turrets,
      total_damage_taken, physical_damage_taken, magic_damage_taken, true_damage_taken,
      time_ccing_others, vision_score,
      wards_placed, wards_killed, vision_wards_bought_in_game,
      item0, item1, item2, item3, item4, item5, item6
    )
    SELECT DISTINCT
      NULLIF(game_id,'')::BIGINT,
      NULLIF(participant_id,'')::INT,

      NULLIF(kills,'')::INT,
      NULLIF(deaths,'')::INT,
      NULLIF(assists,'')::INT,

      NULLIF(baron_kills,'')::INT,
      NULLIF(dragon_kills,'')::INT,

      NULLIF(gold_earned,'')::INT,
      NULLIF(gold_spent,'')::INT,

      NULLIF(total_damage_dealt,'')::BIGINT,
      NULLIF(total_damage_dealt_to_champions,'')::BIGINT,

      NULLIF(physical_damage_dealt_to_champions,'')::BIGINT,
      NULLIF(magic_damage_dealt_to_champions,'')::BIGINT,
      NULLIF(true_damage_dealt_to_champions,'')::BIGINT,

      NULLIF(damage_dealt_to_objectives,'')::BIGINT,
      NULLIF(damage_dealt_to_turrets,'')::BIGINT,

      NULLIF(total_damage_taken,'')::BIGINT,
      NULLIF(physical_damage_taken,'')::BIGINT,
      NULLIF(magic_damage_taken,'')::BIGINT,
      NULLIF(true_damage_taken,'')::BIGINT,

      NULLIF(time_ccing_others,'')::BIGINT,
      NULLIF(vision_score,'')::INT,

      NULLIF(wards_placed,'')::INT,
      NULLIF(wards_killed,'')::INT,
      NULLIF(vision_wards_bought_in_game,'')::INT,

      NULLIF(item0,'')::INT,
      NULLIF(item1,'')::INT,
      NULLIF(item2,'')::INT,
      NULLIF(item3,'')::INT,
      NULLIF(item4,'')::INT,
      NULLIF(item5,'')::INT,
      NULLIF(item6,'')::INT
    FROM raw_lol
    WHERE NULLIF(game_id,'') IS NOT NULL
      AND NULLIF(participant_id,'') IS NOT NULL
    ON CONFLICT (game_id, participant_id) DO UPDATE
    SET kills = EXCLUDED.kills,
        deaths = EXCLUDED.deaths,
        assists = EXCLUDED.assists,
        baron_kills = EXCLUDED.baron_kills,
        dragon_kills = EXCLUDED.dragon_kills,
        gold_earned = EXCLUDED.gold_earned,
        gold_spent = EXCLUDED.gold_spent,
        total_damage_dealt = EXCLUDED.total_damage_dealt,
        total_damage_dealt_to_champions = EXCLUDED.total_damage_dealt_to_champions,
        physical_damage_dealt_to_champions = EXCLUDED.physical_damage_dealt_to_champions,
        magic_damage_dealt_to_champions = EXCLUDED.magic_damage_dealt_to_champions,
        true_damage_dealt_to_champions = EXCLUDED.true_damage_dealt_to_champions,
        damage_dealt_to_objectives = EXCLUDED.damage_dealt_to_objectives,
        damage_dealt_to_turrets = EXCLUDED.damage_dealt_to_turrets,
        total_damage_taken = EXCLUDED.total_damage_taken,
        physical_damage_taken = EXCLUDED.physical_damage_taken,
        magic_damage_taken = EXCLUDED.magic_damage_taken,
        true_damage_taken = EXCLUDED.true_damage_taken,
        time_ccing_others = EXCLUDED.time_ccing_others,
        vision_score = EXCLUDED.vision_score,
        wards_placed = EXCLUDED.wards_placed,
        wards_killed = EXCLUDED.wards_killed,
        vision_wards_bought_in_game = EXCLUDED.vision_wards_bought_in_game,
        item0 = EXCLUDED.item0,
        item1 = EXCLUDED.item1,
        item2 = EXCLUDED.item2,
        item3 = EXCLUDED.item3,
        item4 = EXCLUDED.item4,
        item5 = EXCLUDED.item5,
        item6 = EXCLUDED.item6;

    /* =======================
       RANKS SOLO (anti-CardinalityViolation)
       - DISTINCT ON(puuid) => 1 ligne max par puuid
       - ORDER BY ... NULLS LAST => choisit une ligne "la plus récente"
       ======================= */
    INSERT INTO ranks (puuid, queue_type, tier, rank, lp, wins, losses)
    SELECT DISTINCT ON (puuid)
      puuid,
      'solo' AS queue_type,
      solo_tier,
      solo_rank,
      NULLIF(solo_lp,'')::INT,
      NULLIF(solo_wins,'')::INT,
      NULLIF(solo_losses,'')::INT
    FROM raw_lol
    WHERE puuid IS NOT NULL AND puuid <> ''
    ORDER BY puuid, NULLIF(game_id,'')::BIGINT DESC NULLS LAST
    ON CONFLICT (puuid, queue_type) DO UPDATE
    SET tier=EXCLUDED.tier,
        rank=EXCLUDED.rank,
        lp=EXCLUDED.lp,
        wins=EXCLUDED.wins,
        losses=EXCLUDED.losses;

    /* =======================
       RANKS FLEX (anti-CardinalityViolation)
       ======================= */
    INSERT INTO ranks (puuid, queue_type, tier, rank, lp, wins, losses)
    SELECT DISTINCT ON (puuid)
      puuid,
      'flex' AS queue_type,
      flex_tier,
      flex_rank,
      NULLIF(flex_lp,'')::INT,
      NULLIF(flex_wins,'')::INT,
      NULLIF(flex_losses,'')::INT
    FROM raw_lol
    WHERE puuid IS NOT NULL AND puuid <> ''
    ORDER BY puuid, NULLIF(game_id,'')::BIGINT DESC NULLS LAST
    ON CONFLICT (puuid, queue_type) DO UPDATE
    SET tier=EXCLUDED.tier,
        rank=EXCLUDED.rank,
        lp=EXCLUDED.lp,
        wins=EXCLUDED.wins,
        losses=EXCLUDED.losses;
    """))
    conn.commit()

print("✅ Cellule 4 exécutée avec succès (version robuste)")
print("ℹ️  Si ton dataset contient des colonnes mastery, on peut remplir champion_mastery aussi.")


✅ Cellule 4 exécutée avec succès (version robuste)
ℹ️  Si ton dataset contient des colonnes mastery, on peut remplir champion_mastery aussi.


In [None]:
from sqlalchemy import text

tables = [
    "raw_lol","games","summoners","champions",
    "participants","participant_stats","ranks"
]

with engine.connect() as conn:
    for t in tables:
        n = conn.execute(text(f"SELECT COUNT(*) FROM {t};")).scalar()
        print(f"{t:<20} {n}")

    missing_summoners = conn.execute(text("""
        SELECT COUNT(*)
        FROM participants p
        LEFT JOIN summoners s ON s.puuid = p.puuid
        WHERE s.puuid IS NULL;
    """)).scalar()

    missing_champions = conn.execute(text("""
        SELECT COUNT(*)
        FROM participants p
        LEFT JOIN champions c ON c.champion_id = p.champion_id
        WHERE p.champion_id IS NOT NULL AND c.champion_id IS NULL;
    """)).scalar()

    print("\nFK checks")
    print("participants sans summoner:", missing_summoners)
    print("participants sans champion:", missing_champions)


raw_lol              40410
games                4044
summoners            28098
champions            169
participants         40410
participant_stats    40410
ranks                56196

FK checks
participants sans summoner: 0
participants sans champion: 0


In [None]:
from sqlalchemy import text

with engine.connect() as conn:
    conn.execute(text("""
    /* ============================
       DIMENSIONS + FACT TABLE
       ============================ */

    DROP TABLE IF EXISTS fact_participant_performance CASCADE;
    DROP TABLE IF EXISTS dim_game CASCADE;
    DROP TABLE IF EXISTS dim_player CASCADE;
    DROP TABLE IF EXISTS dim_champion CASCADE;
    DROP TABLE IF EXISTS dim_rank CASCADE;

    CREATE TABLE dim_game AS
    SELECT
      g.game_id,
      g.game_start_utc,
      g.game_duration,
      g.game_mode,
      g.game_type,
      g.game_version,
      g.map_id,
      g.platform_id,
      g.queue_id
    FROM games g;

    CREATE TABLE dim_player AS
    SELECT
      s.puuid,
      s.summoner_id,
      s.summoner_name,
      s.summoner_level
    FROM summoners s;

    CREATE TABLE dim_champion AS
    SELECT
      c.champion_id,
      c.champion_name
    FROM champions c;

    CREATE TABLE dim_rank AS
    SELECT
      puuid,
      MAX(CASE WHEN queue_type='solo' THEN tier END) AS solo_tier,
      MAX(CASE WHEN queue_type='solo' THEN rank END) AS solo_rank,
      MAX(CASE WHEN queue_type='solo' THEN lp END)   AS solo_lp,
      MAX(CASE WHEN queue_type='flex' THEN tier END) AS flex_tier,
      MAX(CASE WHEN queue_type='flex' THEN rank END) AS flex_rank,
      MAX(CASE WHEN queue_type='flex' THEN lp END)   AS flex_lp
    FROM ranks
    GROUP BY puuid;

    CREATE TABLE fact_participant_performance AS
    SELECT
      p.game_id,
      p.puuid,
      p.champion_id,
      p.team_id,
      p.win,
      p.individual_position,
      p.team_position,
      p.lane,
      p.role,

      st.kills,
      st.deaths,
      st.assists,
      (st.kills + st.assists) AS kp_raw,  -- indicateur simple
      st.gold_earned,
      st.gold_spent,
      st.vision_score,
      st.wards_placed,
      st.wards_killed,
      st.total_damage_dealt_to_champions,
      st.total_damage_taken
    FROM participants p
    LEFT JOIN participant_stats st
      ON st.game_id = p.game_id AND st.participant_id = p.participant_id;

    /* Index pour perfs */
    CREATE INDEX IF NOT EXISTS idx_fact_game ON fact_participant_performance(game_id);
    CREATE INDEX IF NOT EXISTS idx_fact_player ON fact_participant_performance(puuid);
    CREATE INDEX IF NOT EXISTS idx_fact_champion ON fact_participant_performance(champion_id);
    """))
    conn.commit()

print("✅ Data mart créé : dim_game, dim_player, dim_champion, dim_rank, fact_participant_performance")


✅ Data mart créé : dim_game, dim_player, dim_champion, dim_rank, fact_participant_performance


In [None]:
from sqlalchemy import text
import pandas as pd

queries = {
"Top 10 champions (winrate)": """
SELECT
  c.champion_name,
  COUNT(*) AS games,
  ROUND(AVG(CASE WHEN f.win THEN 1 ELSE 0 END)::numeric, 4) AS winrate
FROM fact_participant_performance f
JOIN dim_champion c ON c.champion_id = f.champion_id
WHERE f.champion_id IS NOT NULL
GROUP BY c.champion_name
HAVING COUNT(*) >= 50
ORDER BY winrate DESC, games DESC
LIMIT 10;
""",
"Top 10 joueurs (KDA)": """
SELECT
  p.summoner_name,
  COUNT(*) AS games,
  ROUND(AVG( (COALESCE(f.kills,0)+COALESCE(f.assists,0)) / NULLIF(COALESCE(f.deaths,0),0)::numeric ) , 3) AS avg_kda
FROM fact_participant_performance f
JOIN dim_player p ON p.puuid = f.puuid
GROUP BY p.summoner_name
HAVING COUNT(*) >= 20
ORDER BY avg_kda DESC, games DESC
LIMIT 10;
"""
}

with engine.connect() as conn:
    for title, q in queries.items():
        df = pd.read_sql(text(q), conn)
        print("\n---", title, "---")
        display(df)



--- Top 10 champions (winrate) ---


Unnamed: 0,champion_name,games,winrate
0,Caitlyn,751,0.0
1,MissFortune,713,0.0
2,Jinx,678,0.0
3,Lux,663,0.0
4,Jhin,633,0.0
5,Lulu,540,0.0
6,Ezreal,520,0.0
7,Viego,520,0.0
8,Viktor,478,0.0
9,MonkeyKing,454,0.0



--- Top 10 joueurs (KDA) ---


Unnamed: 0,summoner_name,games,avg_kda
0,DJ KHALEDS SON,21,7.006
1,Kudemon,20,6.823
2,Trendix,20,5.996
3,Totthy,20,5.961
4,G2 Matel,20,5.838
5,Spanie,20,5.808
6,Tomsteri,20,5.614
7,ISweαrSheWas18,20,5.601
8,Hanover12,20,5.493
9,splachl jsem to,20,5.493
