In [3]:
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path

project_root = Path.cwd().parent
raw_dir = project_root / "data" / "raw"
proc_dir = project_root / "data" / "processed"
answers_dir = project_root / "reports" / "answers"
figures_dir = project_root / "reports" / "figures"
answers_dir.mkdir(parents=True, exist_ok=True)
figures_dir.mkdir(parents=True, exist_ok=True)

stats = pd.read_parquet(raw_dir / "statistics_all.parquet")
matches = pd.read_parquet(proc_dir / "match_results_player_filled.parquet")

df = stats[stats["statistic_name"] == "double_faults"]

home_df = df[["match_id", "home_value"]].merge(
    matches[["match_id", "gender_home"]],
    on="match_id", how="left"
).rename(columns={"home_value": "double_faults", "gender_home": "gender"})

away_df = df[["match_id", "away_value"]].merge(
    matches[["match_id", "gender_away"]],
    on="match_id", how="left"
).rename(columns={"away_value": "double_faults", "gender_away": "gender"})

# ترکیب داده‌ها
all_faults = pd.concat([home_df, away_df], ignore_index=True)

mean_df = all_faults.groupby("gender", as_index=False)["double_faults"].mean()
mean_df["percentage"] = mean_df["double_faults"] / mean_df["double_faults"].sum() * 100

csv_path = answers_dir / "q8_double_faults_percentage_by_gender_mean.csv"
mean_df.to_csv(csv_path, index=False, encoding="utf-8-sig")

print("\nMean Double Faults & Percentage by Gender")
print(mean_df)
print(f"\nCSV saved to: {csv_path}")

colors = ["#CF6097", "#3D8CDB"]
plt.figure(figsize=(5, 5))
plt.pie(
    mean_df["percentage"],
    labels=mean_df["gender"],
    autopct="%1.1f%%",
    startangle=90,
    colors=colors,
    textprops={'fontsize': 12}
)
plt.title("Mean Double Faults Percentage by Gender", fontsize=14)

fig_path = figures_dir / "q8_double_faults_percentage_pie.png"
plt.savefig(fig_path, dpi=300)
plt.close()

print(f"Pie chart saved to: {fig_path}")



Mean Double Faults & Percentage by Gender
  gender  double_faults  percentage
0      F       2.073707   56.685673
1      M       1.584549   43.314327

CSV saved to: c:\Users\mit\Desktop\Final_Project\reports\answers\q8_double_faults_percentage_by_gender_mean.csv
Pie chart saved to: c:\Users\mit\Desktop\Final_Project\reports\figures\q8_double_faults_percentage_pie.png
