# Team Ratings

In [9]:
# Local libraries
import Tools.ratings_utils as ru

FILENAME = "Data/data_2024.json"
TOURNAMENT_FILENAME = "Data/tournament_2024.csv"

## Massey Ratings

In [11]:
score_df = ru.set_massey_rating_data_frame(filename=FILENAME)
massey_ratings = ru.calculate_massey_ratings(score_df=score_df,
                                             debug=False)

ru.simulate_tournament(filename=TOURNAMENT_FILENAME,
                       ratings=massey_ratings)

Round: 1 / Round of 64 - Correct picks: 22 out of 32
Round: 2 / Round of 32 - Correct picks: 12 out of 16
Round: 3 / Sweet 16 - Correct picks: 4 out of 8
Round: 4 / Elite 8 - Correct picks: 2 out of 4
Round: 5 / Final 4 - Correct picks: 0 out of 2
Round: 6 / Championship - Correct picks: 0 out of 1

Total correct picks in tournament: 40 out of 63


# Colley Ratings

In [16]:
import numpy as np
import pandas as pd

def calculate_colley_ratings(games, teams, debug: bool=False):
    """Calculates Colley rankings given a game results DataFrame.

    Args:
        games: DataFrame with columns ["Team1", "Team2", "Winner"]
        teams: List of unique teams
        debug (bool): flag to print debug statements
    
    Returns:
        colley_ratings: Pandas Series with team rankings
    """

    n = len(teams)  # Number of teams
    team_index = {team: i for i, team in enumerate(teams)}  # Map teams to indices

    # Initialize Colley matrix (C) and RHS vector (b)
    C = np.eye(n) * 2  # Start with 2 on the diagonal
    b = np.ones(n)  # Initialize b with 1s

    # Populate matrix and vector using game results
    for _, row in games.iterrows():
        t1, t2, winner = row["Team1"], row["Team2"], row["Winner"]
        i, j = team_index[t1], team_index[t2]

        # Update matrix
        C[i, i] += 1  # Each team gets an additional game played
        C[j, j] += 1
        C[i, j] -= 1
        C[j, i] -= 1

        # Update b vector
        if winner == t1:
            b[i] += 0.5
            b[j] -= 0.5
        else:
            b[i] -= 0.5
            b[j] += 0.5

    # Solve for ratings
    ratings = np.linalg.solve(C, b)
    
    # Convert ratings to a dictionary
    colley_ratings = {team: rating for team, rating in zip(teams, ratings)}

    # Sort and display rankings
    colley_rankings = sorted(colley_ratings.items(), key=lambda x: x[1], reverse=True)
    
    if debug:
        for rank, (team, rating) in enumerate(colley_rankings, 1):
            print(f"{rank}. {team}: {rating:.2f}")
    
    return colley_ratings

# Example Usage
games = pd.DataFrame([
    {"Team1": "Kansas", "Team2": "Duke", "Winner": "Kansas"},
    {"Team1": "Kansas", "Team2": "Arkansas", "Winner": "Arkansas"},
    {"Team1": "Duke", "Team2": "Arkansas", "Winner": "Duke"},
    {"Team1": "Kansas", "Team2": "Arkansas", "Winner": "Kansas"}
])

teams = list(set(games["Team1"]).union(set(games["Team2"])))

colley_ratings = calculate_colley_ratings(games=games,
                                          teams=teams,
                                          debug=True)
print(colley_ratings)


1. Kansas: 0.57
2. Duke: 0.50
3. Arkansas: 0.43
{'Duke': 0.5, 'Kansas': 0.5714285714285715, 'Arkansas': 0.4285714285714286}


# Test code

In [None]:
# Currently unused (save for Massey, Colley, Elo Ratings)
tournament = Tournament.Tournament(url=TOURNAMENT_URL,
                                   debug=True)

In [None]:
# # Check results manually
# import csv

# # Convert dictionary to a CSV-friendly format
# with open("teams.csv", mode="w", newline="") as file:
#     writer = csv.writer(file)

#     keys = list(tourney_dict.keys())
#     writer.writerow(keys)  # Header

#     for i in range(len(tourney_dict[keys[0]])):
#         row = []
#         for key in keys:
#             row.append(tourney_dict[key][i])
#         writer.writerow(row)  # Combine team name with stats