In [25]:
from ydata_profiling import ProfileReport
import pandas as pd

In [27]:
games = pd.read_csv('Data/Games.csv')
teams = pd.read_csv("Data/Teams.csv")[['CompetitionID','TeamID','Name']]
competitions = pd.read_csv("Data/Competition.csv")[['CompetitionID','CompetitionName','Place']]

In [28]:
games_merged = games.merge(competitions, on="CompetitionID", how="left")
games_merged = games_merged.merge(teams, left_on=["CompetitionID","TeamID1"], right_on=["CompetitionID","TeamID"], how="left").rename(columns={'Name':'Team1'})
games_merged = games_merged.merge(teams, left_on=["CompetitionID","TeamID2"], right_on=["CompetitionID","TeamID"], how="left").rename(columns={'Name':'Team2'})

In [19]:
report = ProfileReport(games_merged)

In [20]:
report

Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]


100%|███████████████████████████████████████| 19/19 [00:00<00:00, 284613.49it/s][A


Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]



In [29]:
# Win Rate by Country -------------------------------------------------
# Winner=1 → Team1 win, Winner=0 → Team2 win
team1_wins = games_merged.groupby('Team1')['Winner'].sum()
team2_wins = (1 - games_merged['Winner']).groupby(games_merged['Team2']).sum()
wins = team1_wins.add(team2_wins, fill_value=0)

games_played = (
    games_merged['Team1'].value_counts()
    + games_merged['Team2'].value_counts()
)
win_rate = (wins / games_played).sort_values(ascending=False).rename('WinRate')
team_stats = pd.DataFrame({'Games': games_played, 'Wins': wins, 'WinRate': win_rate})
print("\nWin Rate by Country\n", team_stats.head())


Win Rate by Country
                 Games  Wins   WinRate
Australia          39    20  0.512821
Austria            10     2  0.200000
Canada             41    28  0.682927
China              19     5  0.263158
Czech Republic     19     8  0.421053


In [32]:
# Win Rate for the Team with the Hammer (LSFE) -------------------------------------
# Check how often the team that had the last stone in the first end (the hammer) won the game.
hammer_team_win = (
    ((games_merged['LSFE'] == 1) & (games_merged['Winner'] == 1))
    | ((games_merged['LSFE'] == 0) & (games_merged['Winner'] == 0))
).mean()
print(f"\nWin Rate for the Team with the Hammer (LSFE): {hammer_team_win:.2%}")


Win Rate for the Team with the Hammer (LSFE): 57.27%


In [33]:
# Average Score Difference per Game -----------------------------------------
games_merged['ScoreDiff'] = abs(
    games_merged['ResultStr1'] - games_merged['ResultStr2']
)
avg_diff = games_merged['ScoreDiff'].mean()
print(f"\nAverage Score Difference per Game: {avg_diff:.2f}")


Average Score Difference per Game: 3.55


In [34]:
# Group-Stage Performance Comparison -------------------------------
group_summary = (
    games_merged.groupby('GroupID')
    .agg(
        Games=('GameID', 'count'),
        AvgScoreTeam1=('ResultStr1', 'mean'),
        AvgScoreTeam2=('ResultStr2', 'mean'),
        WinRateTeam1=('Winner', 'mean')
    )
    .reset_index()
)
print("\nGroup-Stage Performance Comparison\n", group_summary)


Group-Stage Performance Comparison
    GroupID  Games  AvgScoreTeam1  AvgScoreTeam2  WinRateTeam1
0        0     74       6.189189       6.108108      0.540541
1        1    135       6.340741       6.251852      0.488889
2        2    135       6.318519       5.948148      0.562963
