In [143]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import requests
from collections import OrderedDict
import csv


In [144]:
elo_ratings = {}
k_factor = 32
pf = open("atp_data/atp_players.csv") 
players = OrderedDict((row[0], row) for row in csv.reader(pf))

In [147]:
import os
import pandas as pd

surface_elo = {
    "hard": {},
    "clay": {},
    "grass": {},
    "unknown": {},
    "overall": {}
}
default_elo = 1500
k_factor = 32

def expected_score(rating_a, rating_b):
    return 1 / (1 + 10 ** ((rating_b - rating_a) / 400))

def update_elo(elo_dict, winner, loser):
    winner_rating = elo_dict.get(winner, default_elo)
    loser_rating = elo_dict.get(loser, default_elo)
    
    expected_winner = expected_score(winner_rating, loser_rating)
    expected_loser = 1 - expected_winner
    
    elo_dict[winner] = winner_rating + k_factor * (1 - expected_winner)
    elo_dict[loser] = loser_rating + k_factor * (0 - expected_loser)

def load_data(years, data_dir):
    dfs = []
    for year in years:
        file_path = os.path.join(data_dir, f"atp_matches_{year}.csv")
        if os.path.exists(file_path):
            df = pd.read_csv(file_path)
            dfs.append(df)
        else:
            print(f"File not found: {file_path}")
    return pd.concat(dfs, ignore_index=True)

years = [2020, 2021, 2022, 2023, 2024]
data_dir = "atp_data"
df = load_data(years, data_dir)
df['surface'] = df['surface'].fillna('unknown')

for _, row in df.iterrows():
    surface = row['surface'].lower()
    winner_id = row['winner_id']
    loser_id = row['loser_id']
    
    update_elo(surface_elo[surface], winner_id, loser_id)
    update_elo(surface_elo['overall'], winner_id, loser_id)

elo_dataframes = {}
for surface, players_elo in surface_elo.items():
    elo_dataframes[surface] = pd.DataFrame([
        {"Player Name": "{} {}".format(players[str(player_id)][1], players[str(player_id)][2]), "Elo": elo}
        for player_id, elo in players_elo.items()
    ])

for surface, df in elo_dataframes.items():
    print(f"{surface.capitalize()} Court Elo Ratings DataFrame:")
    print(df.head())

for surface, df in elo_dataframes.items():
    if not df.empty:
        print(f"{surface.capitalize()} Court Elo Ratings:")
        print(df.sort_values(by="Elo", ascending=False))
    else:
        print(f"No data for {surface.capitalize()} Court Elo Ratings")


Hard Court Elo Ratings DataFrame:
             Player Name          Elo
0         Novak Djokovic  1994.142766
1           Rafael Nadal  1690.652210
2  Roberto Bautista Agut  1610.977742
3          Dusan Lajovic  1489.836556
4        Daniil Medvedev  1835.243224
Clay Court Elo Ratings DataFrame:
         Player Name          Elo
0     Cristian Garin  1576.177609
1  Diego Schwartzman  1471.693966
2        Laslo Djere  1541.284090
3      Andrej Martin  1455.856245
4       Albert Ramos  1474.996999
Grass Court Elo Ratings DataFrame:
            Player Name          Elo
0  Aisam Ul Haq Qureshi  1514.624436
1         Nik Razborsek  1484.000000
2            Aqeel Khan  1511.585761
3           Blaz Kavcic  1484.000000
4      Denis Shapovalov  1550.477819
Unknown Court Elo Ratings DataFrame:
                    Player Name          Elo
0  Matheus Pucinelli De Almeida  1516.000000
1                     Rigele Te  1468.736307
2               Thiago Monteiro  1516.000000
3                       Ji

In [151]:
import os
import pandas as pd
import streamlit as st

surface_elo = {
    "hard": {},
    "clay": {},
    "grass": {},
    "unknown": {},
    "overall": {}
}
default_elo = 1500
k_factor = 32

# Initialize Elo history storage
elo_timeseries = {
    "hard": {},
    "clay": {},
    "grass": {},
    "unknown": {},
    "overall": {}
}

def expected_score(rating_a, rating_b):
    return 1 / (1 + 10 ** ((rating_b - rating_a) / 400))

def update_elo(elo_dict, winner, loser, surface, match_date):
    winner_rating = elo_dict.get(winner, default_elo)
    loser_rating = elo_dict.get(loser, default_elo)
    
    expected_winner = expected_score(winner_rating, loser_rating)
    expected_loser = 1 - expected_winner
    
    new_winner_rating = winner_rating + k_factor * (1 - expected_winner)
    new_loser_rating = loser_rating + k_factor * (0 - expected_loser)
    
    # Update Elo scores
    elo_dict[winner] = new_winner_rating
    elo_dict[loser] = new_loser_rating

    # Update timeseries
    if winner not in elo_timeseries[surface]:
        elo_timeseries[surface][winner] = []
    if loser not in elo_timeseries[surface]:
        elo_timeseries[surface][loser] = []

    elo_timeseries[surface][winner].append({"date": match_date, "elo": new_winner_rating})
    elo_timeseries[surface][loser].append({"date": match_date, "elo": new_loser_rating})

def load_data(years, data_dir):
    dfs = []
    for year in years:
        file_path = os.path.join(data_dir, f"atp_matches_{year}.csv")
        if os.path.exists(file_path):
            df = pd.read_csv(file_path)
            dfs.append(df)
        else:
            print(f"File not found: {file_path}")
    return pd.concat(dfs, ignore_index=True)

years = [2020, 2021, 2022, 2023, 2024]
data_dir = "atp_data"

df = load_data(years, data_dir)

df['surface'] = df['surface'].fillna('unknown')

# Iterate over matches and update Elo
for _, row in df.iterrows():
    surface = row['surface'].lower()
    winner_id = row['winner_id']
    loser_id = row['loser_id']
    match_date = row['tourney_date']  # Assuming 'tourney_date' is in the dataset

    update_elo(surface_elo[surface], winner_id, loser_id, surface, match_date)
    update_elo(surface_elo['overall'], winner_id, loser_id, "overall", match_date)

# Convert Elo timeseries into DataFrames
elo_dataframes = {}
for surface, players in elo_timeseries.items():
    timeseries_data = []
    for player, matches in players.items():
        for match in matches:
            timeseries_data.append({"player_id": player, "date": match["date"], "elo": match["elo"]})
    elo_dataframes[surface] = pd.DataFrame(timeseries_data)

# Displaying example timeseries DataFrame
for surface, df in elo_dataframes.items():
    if not df.empty:
        print(f"{surface.capitalize()} Court Elo Timeseries:")
        print(df.head())

# Save to CSVs if needed
# for surface, df in elo_dataframes.items():
#     df.to_csv(f"{surface}_elo_timeseries.csv", index=False)


Hard Court Elo Timeseries:
   player_id      date          elo
0     104925  20200106  1516.000000
1     104925  20200106  1531.263693
2     104925  20200106  1545.827820
3     104925  20200106  1559.729526
4     104925  20200106  1573.005662
Clay Court Elo Timeseries:
   player_id      date          elo
0     106426  20200203  1516.000000
1     106426  20200203  1531.263693
2     106426  20200203  1545.827820
3     106426  20200203  1559.729526
4     106426  20200217  1573.009990
Grass Court Elo Timeseries:
   player_id      date          elo
0     103529  20200306  1516.000000
1     103529  20210305  1499.263693
2     103529  20220304  1515.297601
3     103529  20230916  1530.593576
4     103529  20240203  1514.624436
Unknown Court Elo Timeseries:
   player_id      date          elo
0     207799  20230203  1516.000000
1     144751  20230203  1484.000000
2     144751  20230203  1468.736307
3     106329  20230203  1516.000000
4     200666  20230203  1484.000000
Overall Court Elo Timese

In [None]:
import os
import pandas as pd
import streamlit as st
import matplotlib.pyplot as plt

# Initialize Elo dictionaries and default settings
surface_elo = {
    "hard": {},
    "clay": {},
    "grass": {},
    "unknown": {},
    "overall": {}
}
default_elo = 1500
k_factor = 32

# Elo timeseries storage
elo_timeseries = {
    "hard": {},
    "clay": {},
    "grass": {},
    "unknown": {},
    "overall": {}
}

pf = open("atp_data/atp_players.csv") 
players_id_to_name = OrderedDict((row[0], row) for row in csv.reader(pf))

# Define Elo calculation functions
def expected_score(rating_a, rating_b):
    return 1 / (1 + 10 ** ((rating_b - rating_a) / 400))

def update_elo(elo_dict, winner, loser, surface, match_date):
    winner_rating = elo_dict.get(winner, default_elo)
    loser_rating = elo_dict.get(loser, default_elo)
    
    expected_winner = expected_score(winner_rating, loser_rating)
    expected_loser = 1 - expected_winner
    
    new_winner_rating = winner_rating + k_factor * (1 - expected_winner)
    new_loser_rating = loser_rating + k_factor * (0 - expected_loser)
    
    elo_dict[winner] = new_winner_rating
    elo_dict[loser] = new_loser_rating

    # Update timeseries
    if winner not in elo_timeseries[surface]:
        elo_timeseries[surface][winner] = []
    if loser not in elo_timeseries[surface]:
        elo_timeseries[surface][loser] = []

    elo_timeseries[surface][winner].append({"date": match_date, "elo": new_winner_rating})
    elo_timeseries[surface][loser].append({"date": match_date, "elo": new_loser_rating})

# Load data
def load_data(years, data_dir):
    dfs = []
    for year in years:
        file_path = os.path.join(data_dir, f"atp_matches_{year}.csv")
        if os.path.exists(file_path):
            df = pd.read_csv(file_path)
            dfs.append(df)
        else:
            print(f"File not found: {file_path}")
    return pd.concat(dfs, ignore_index=True)

# Data directory and years
years = [2020, 2021, 2022, 2023, 2024]
data_dir = "atp_data"
df = load_data(years, data_dir)
df['surface'] = df['surface'].fillna('unknown')

# Process matches and update Elo
for _, row in df.iterrows():
    surface = row['surface'].lower()
    winner_id = row['winner_id']
    loser_id = row['loser_id']
    match_date = row['tourney_date']  # Assuming 'tourney_date' is in the dataset

    update_elo(surface_elo[surface], winner_id, loser_id, surface, match_date)
    update_elo(surface_elo['overall'], winner_id, loser_id, "overall", match_date)

# Convert Elo timeseries into DataFrames
elo_dataframes = {}
for surface, players in elo_timeseries.items():
    timeseries_data = []
    for player, matches in players.items():
        for match in matches:
            timeseries_data.append({"player_id": player, "date": match["date"], "elo": match["elo"]})
    elo_dataframes[surface] = pd.DataFrame(timeseries_data)

# Streamlit application
st.title("Tennis Player Elo Ratings Over Time")

# Map player IDs to names for dropdown
player_names = {v: k for k, v in players_id_to_name.items()}  # Reverse dictionary for mapping names back to IDs

# Player selection by name
player1_name = st.selectbox("Select Player 1:", player_names.keys(), key="player1")
player2_name = st.selectbox("Select Player 2:", player_names.keys(), key="player2")

if player1_name and player2_name:
    player1_id = player_names[player1_name]
    player2_id = player_names[player2_name]
    
    st.write(f"### Elo Ratings Time Series: {player1_name} vs {player2_name}")
    
    for surface in ["overall", "hard", "clay", "grass"]:
        # Filter data for selected players
        player1_data = elo_dataframes[surface][elo_dataframes[surface]['player_id'] == player1_id]
        player2_data = elo_dataframes[surface][elo_dataframes[surface]['player_id'] == player2_id]

        # Sort by date
        player1_data = player1_data.sort_values(by="date") if not player1_data.empty else player1_data
        player2_data = player2_data.sort_values(by="date") if not player2_data.empty else player2_data

        if player1_data.empty and player2_data.empty:
            st.write(f"No data available for {surface.capitalize()} Court Elo Ratings.")
            continue

        # Plot time series
        fig, ax = plt.subplots()
        if not player1_data.empty:
            ax.plot(player1_data['date'], player1_data['elo'], label=player1_name, marker='o')
        if not player2_data.empty:
            ax.plot(player2_data['date'], player2_data['elo'], label=player2_name, marker='o')
        
        # Customize plot
        ax.set_title(f"{surface.capitalize()} Court Elo Ratings Over Time")
        ax.set_xlabel("Date")
        ax.set_ylabel("Elo Rating")
        ax.legend()
        ax.grid()

        # Render plot in Streamlit
        st.pyplot(fig)
