In [1]:
import pandas as pd
import pandas as pd, numpy as np, re, joblib
from pathlib import Path
from datetime import datetime, timedelta
import joblib


emea_df = pd.read_csv("emea.csv")
amer_df = pd.read_csv("amer.csv")
cn_df   = pd.read_csv("cn.csv")
apac_df = pd.read_csv("apac.csv")

emea_df["Region"] = "EMEA"
amer_df["Region"] = "AMER"
cn_df["Region"]   = "CN"
apac_df["Region"] = "APAC"

# combine all regions into one DataFrame
all_df = pd.concat([emea_df, amer_df, cn_df, apac_df], ignore_index=True)
all_df["MatchID"] = all_df["MatchID"].astype(str)
all_df["MatchID"] = all_df["MatchID"] + "_" + all_df["Region"]
all_df["Player"] = all_df["Player"].str.lower()
all_df["Agent"] = all_df["Agent"].str.lower()
all_df.to_csv("all_regions.csv", index=False)

In [2]:
vct_teams_fullname_to_abbr = {
    # VCT Americas
    "100 THIEVES": "100T",
    "CLOUD9": "C9",
    "EVIL GENIUSES": "EG",
    "FURIA": "FURIA",
    "G2 ESPORTS": "G2",
    "KRÜ ESPORTS": "KRÜ",
    "LEVIATÁN": "LEV",
    "LOUD": "LOUD",
    "MIBR": "MIBR",
    "NRG": "NRG",
    "SENTINELS": "SEN",
    "2GAME ESPORTS": "2G",

    # VCT EMEA
    "BBL ESPORTS": "BBL",
    "FNATIC": "FNC",
    "FUT ESPORTS": "FUT",
    "GIANTX": "GX",
    "TEAM HERETICS": "HER",
    "KARMINE CORP": "KC",
    "KOI": "KOI",
    "NATUS VINCERE": "NAVI",
    "TEAM LIQUID": "TL",
    "TEAM VITALITY": "VIT",
    "GENTLE MATES": "GM",
    "APEKS" : "APK",

    # VCT Pacific (APAC)
    "BLEED ESPORTS": "BLEED",
    "DETONATION FOCUSME": "DFM",
    "DRX": "DRX",
    "GLOBAL ESPORTS": "GE",
    "GEN.G ESPORTS": "GEN.G",
    "PAPER REX": "PRX",
    "REX REGUM QEON": "RRQ",
    "T1": "T1",
    "TALON ESPORTS": "TALON",
    "TEAM SECRET": "TS",
    "ZETA DIVISION": "ZETA",
    "BOOM ESPORTS": "BME",

    # VCT China
    "ALL GAMERS": "AG",
    "BILIBILI GAMING": "BLG",
    "EDWARD GAMING": "EDG",
    "FUNPLUS PHOENIX": "FPX",
    "JD GAMING": "JDG",
    "NOVA ESPORTS": "NOVA",
    "TITAN ESPORTS CLUB": "TEC",
    "TRACE ESPORTS": "TRACE",
    "TYLOO": "TYLOO",
    "WOLVES ESPORTS": "WOLVES",
    "DRAGON RANGER GAMING": "DRG",
    "XI LAI GAMING": "XLG"

}
def get_team_abbreviation(team_name: str) -> str:
    """Return the team abbreviation for a given team name."""
    return vct_teams_fullname_to_abbr.get(team_name.upper(), team_name.upper())


In [3]:
MODEL_PATH = "kills_model.pkl"
COLS_PATH  = "kills_model_cols.pkl"

def predict_kills(player: str,
                  map_name: str,
                  opponent_team: str,
                  rounds_played: int | float,
                  model_path: str = MODEL_PATH,
                  cols_path: str = COLS_PATH) -> float:
    """Return estimated kills (float)."""
    # 1. load model + training column order
    model = joblib.load(model_path)
    cols  = pd.read_pickle(cols_path)   # pandas Index

    # 2. build 1-row DataFrame with the four inputs
    row = pd.DataFrame({
        "Player":       [player.lower()],
        "Map":          [map_name],
        "OpponentTeam": [opponent_team],
        "RoundsPlayed": [float(rounds_played)]
    })

    # 3. one-hot encode & align columns
    row_enc = pd.get_dummies(row, drop_first=False)
    row_enc = row_enc.reindex(columns=cols, fill_value=0)

    # 4. predict
    return model.predict(row_enc)[0]

# # ── demo ────────────────────────────────────────────────────────────────
# if __name__ == "__main__":
#     est = predict_kills("paTiTek", "Split", "FNC", 23)
#     print(f"Predicted kills: {est:.1f}")


In [4]:
def score_kills_csv(csv_path,
                    model_path="kills_model.pkl",
                    cols_path="kills_model_cols.pkl",
                    out_path=None):

    df = pd.read_csv(csv_path)

    # ---- 0) canonicalise the four must-have columns ------------------
    ren = {
        "Match": "MatchID",
        r"map(|name)|map_title":               "Map",
        r"team$|my[_\s]?team|squad":           "Team",
        r"player(|name)|gamer(tag)?|nick":     "Player",
    }
    for pat, new in ren.items():
        for c in df.columns:
            if re.fullmatch(pat, c, flags=re.I):
                df = df.rename(columns={c: new})

    required = {"MatchID", "Map", "Team", "Player"}
    if not required.issubset(df.columns):
        missing = required - set(df.columns)
        raise ValueError(f"CSV still missing columns: {missing}")

    # ---- 1) RoundsPlayed (parse if absent) ---------------------------
    if "RoundsPlayed" not in df.columns:
        if "MapScore" not in df.columns:
            raise ValueError("Need RoundsPlayed or MapScore column.")
        df["RoundsPlayed"] = (
            df["MapScore"].astype(str)
              .apply(lambda s: sum(map(int, re.findall(r"\d+", s)[:2]))
                                  if re.search(r"\d+.*\d+", s) else np.nan)
        )
    df["RoundsPlayed"] = pd.to_numeric(df["RoundsPlayed"], errors="coerce")

    # ---- 2) OpponentTeam by swapping ---------------------------------
    def swap(s):
        u = s.unique()
        if len(u) != 2:
            raise ValueError("Each MatchID–Map must have exactly 2 teams")
        return s.replace({u[0]: u[1], u[1]: u[0]})
    df["OpponentTeam"] = df.groupby(["MatchID", "Map"])["Team"].transform(swap)

    # ---- 3) build feature matrix & score -----------------------------
    X = pd.get_dummies(df[["Player", "Map", "OpponentTeam", "RoundsPlayed"]],
                       drop_first=False)
    X = X.reindex(columns=pd.read_pickle(cols_path), fill_value=0)

    df["PredKills"] = joblib.load(model_path).predict(X)

    if out_path:
        Path(out_path).parent.mkdir(parents=True, exist_ok=True)
        df.to_csv(out_path, index=False)

    return df


In [5]:
%run ../aryan/elo_implementation.ipynb

FNATIC Ascent elo on 2025-02-15: 100

TEAM LIQUID map ratings (as of 2025-02-15):
{}

Final Elo table:
Team      Map    Elo
100T   Ascent 106.75
100T Fracture 110.62
100T    Haven  61.20
100T   Icebox  99.68
100T    Pearl  79.80
100T    Split 103.44
  2G   Ascent 110.85
  2G    Haven  83.79
  2G   Icebox  78.48
  2G    Lotus  93.78
  2G    Pearl  96.48
  AG Fracture 100.60
  AG    Lotus  87.63
  AG    Pearl  55.16
  AG    Split  95.40
 APK   Ascent  84.24
 APK    Haven  95.40
 APK    Lotus  68.41
 APK    Split  85.97
 BBL   Ascent 104.60
 BBL Fracture  92.59
 BBL    Haven 133.02
 BBL   Icebox  95.15
 BBL    Lotus 111.19
 BBL    Pearl 110.63
 BLG   Ascent 120.20
 BLG Fracture 101.94
 BLG    Haven  75.53
 BLG   Icebox 107.35
 BLG    Lotus 125.47
 BLG    Pearl 123.26
 BLG    Split  87.60
 BME   Ascent  84.52
 BME Fracture  85.01
 BME    Haven 104.15
 BME   Icebox 109.87
 BME    Lotus 105.99
 BME    Pearl 107.20
 BME    Split  85.57
  C9   Ascent 109.49
  C9 Fracture 103.96
  C9    Haven 1

In [6]:
# --- Load your dataframes here ---
betting_df = pd.read_csv('../leo/valorant_projections_2025-05-24_18-20-05.csv')          # The betting lines dataframe
stats_df = pd.read_csv('../anthony/all_regions.csv')            # The match-level stats dataframe

def data_frame_preprocessing(betting_df, stats_df):
    """
    Preprocess the betting and stats dataframes to prepare for kill predictions.
    
    Args:
        betting_df (pd.DataFrame): DataFrame containing betting lines.
        stats_df (pd.DataFrame): DataFrame containing match statistics.
        
    Returns:
        pd.DataFrame: Processed betting DataFrame with predicted kills.
    """

    # --- Step 1: Normalize player names ---
    betting_df['player_name'] = betting_df['player_name'].str.lower()

    # --- Step 2: Determine the most recent team for each player ---
    # Convert player column to lowercase in stats_df for merging
    stats_df['Player_lower'] = stats_df['Player'].str.lower()

    # Sort by date to get the most recent match
    stats_df['Date'] = pd.to_datetime(stats_df['Date'])
    most_recent_team = stats_df.sort_values('Date').drop_duplicates('Player_lower', keep='last')
    player_team_map = dict(zip(most_recent_team['Player_lower'], most_recent_team['Team']))

    # Add team info to betting_df
    betting_df['team'] = betting_df['player_name'].map(player_team_map)

    # --- Step 3: Extract opponent team ---
    betting_df['opponent_team'] = betting_df['opponent'].str.extract(r'^(.*?)\s+MAP', expand=False)


    # --- Step 4: Set match date to tomorrow ---
    # All betting lines are assumed to be for tomorrow
    return betting_df


# --- Step 5: Predict kills ---
def get_kill_prediction(row):
    player = row['player_name'].title()  # Convert to match stats_df
    team = row['team']
    team = vct_teams_fullname_to_abbr.get(str(team).upper(), team)
    opponent = row['opponent_team']
    opponent = vct_teams_fullname_to_abbr.get(str(opponent).upper(), opponent)
    # Normalize opponent name
    match_date = (datetime.today() + timedelta(days=1)).strftime('%Y-%m-%d')
    stat_type = row['stat_type']

    if pd.isna(team) or pd.isna(opponent):
        return None

    try:
        map_result = simulate_map_selection(team, opponent, match_date)
    except Exception as e:
        print(f"Map sim failed for {team} vs {opponent}: {e}")
        return None

    # DEFAULT maps_1-2_kills
    maps_to_play = [map_result["PickA"], map_result["PickB"]]

    if stat_type == "maps_1-3_kills" or stat_type == "map_1_2_3_kills":
        remaining_pool = [m for m in ALL_MAPS if m not in {
            map_result["BanA"], map_result["BanB"], map_result["PickA"], map_result["PickB"]
        }]
        if remaining_pool:
            try:
                decider_map = max(remaining_pool, key=lambda m: elo_hist.rating(team, m, pd.to_datetime(match_date)))
                maps_to_play.append(decider_map)
            except:
                pass  # If Elo fails, just skip decider
    elif stat_type == "map_3_kills":
        # Only simulate the decider map (3rd map)
        remaining_pool = [m for m in ALL_MAPS if m not in {
            map_result["BanA"], map_result["BanB"], map_result["PickA"], map_result["PickB"]
        }]
        maps_to_play = []  # reset to ensure only 1 map
        if remaining_pool:
            try:
                decider_map = max(remaining_pool, key=lambda m: elo_hist.rating(team, m, pd.to_datetime(match_date)))
                maps_to_play.append(decider_map)
            except:
                pass  # If Elo fails, just skip prediction


    total_kills = 0
    for map_name in maps_to_play:
        try:
            rounds_played = predict_map_length_by_date(team, opponent, map_name, match_date)
            kills = predict_kills(player, map_name, opponent, rounds_played)
            total_kills += kills
        except Exception as e:
            print(f"Kill prediction failed for {player} on {map_name}: {e}")
            continue

    return total_kills

In [43]:
test = {
    "player_name": "johnqt",
    "team" : "Sentinels",
    "opponent_team": "FNATIC",
    "stat_type": "maps_1-2_kills",
}
test = pd.DataFrame([test])
test["predicted_kills"] = test.apply(get_kill_prediction, axis=1)
test

Unnamed: 0,player_name,team,opponent_team,stat_type,predicted_kills
0,johnqt,Sentinels,FNATIC,maps_1-2_kills,27.528179
