16. Which player has the highest winning percentage against top 10 ranked opponents? 


In [None]:
import pandas as pd
from pathlib import Path

project_root = Path.cwd().parent
processed_dir = project_root / "data" / "processed"
raw_dir = project_root / "data" / "raw"
answers_dir = project_root / "reports" / "answers"
answers_dir.mkdir(parents=True, exist_ok=True)

matches_file = processed_dir / "match_results_with_winner.parquet"
players_file = raw_dir / "players.parquet"

matches = pd.read_parquet(matches_file)
players = pd.read_parquet(players_file)[["player_id", "full_name", "slug", "current_rank"]]

# محاسبه ستون‌های برنده و حریف
matches["winner_player_id"] = matches.apply(
    lambda row: row["home_player_id"] if row["winner_code"] == 1 else row["away_player_id"],
    axis=1
)
matches["opponent_id"] = matches.apply(
    lambda row: row["away_player_id"] if row["winner_code"] == 1 else row["home_player_id"],
    axis=1
)

# اضافه کردن رنک حریف
matches = matches.merge(
    players.rename(columns={"player_id": "opponent_id", "current_rank": "opp_rank"}),
    on="opponent_id",
    how="left"
)

top10_matches = matches[matches["opp_rank"] <= 10]

# شمارش بردها
wins_count = (
    top10_matches.groupby("winner_player_id")
    .size()
    .reset_index(name="wins_vs_top10")
)

# اتصال به اطلاعات بازیکنان
result = wins_count.merge(players, left_on="winner_player_id", right_on="player_id")

top_player = result.sort_values("wins_vs_top10", ascending=False).head(1)
print(top_player[["full_name", "slug", "wins_vs_top10"]])

output_file = answers_dir / "q16_top_player_vs_top10.csv"
top_player[["full_name", "slug", "wins_vs_top10"]] \
    .to_csv(output_file, index=False, encoding="utf-8-sig")
print(f"Saved to {output_file}")


       full_name         slug  wins_vs_top10
30  Humbert, Ugo  humbert-ugo              4
Saved to c:\Users\mit\Desktop\Finaaal\reports\answers\q16_top_player_vs_top10.csv
