# Team Ratings

In [18]:
# Third party libraries
import pandas as pd
import numpy as np

# Local libraries
import Tools.json_utils as ju

FILENAME = "Data/data_2024.json"

In [None]:
# Currently unused (save for Massey, Colley, Elo Ratings)
tournament = Tournament.Tournament(url=TOURNAMENT_URL,
                                   debug=True)

In [70]:
# Example dataset
score_df = {
    "Home": [],
    "Home_Score": [],
    "Away": [],
    "Away_Score": []
}
print(score_df)

{'Home': [], 'Home_Score': [], 'Away': [], 'Away_Score': []}


In [71]:

# Read Stats
teams_df = pd.read_json(FILENAME)

team = "Kansas"

team_df = teams_df[team]


for i in range(len(team_df["Type"])):

    # Non Tournament games
    if team_df["Type"][i] != "NCAA" and team_df["Type"][i] != "CIT":

        # Find which team is home/away (None = home, @ = away, N = neutral/assign home to winner?)
        if team_df["Site"][i] is None:

            # Current team is home team
            score_df["Home"].append(team)
            score_df["Away"].append(team_df["Opponent"][i])
            score_df["Home_Score"].append(int(team_df["Tm"][i]))
            score_df["Away_Score"].append(int(team_df["Opp"][i]))
            
        elif team_df["Site"][i] == "@":

            # Opponent team is away team
            score_df["Home"].append(team_df["Opponent"][i])
            score_df["Away"].append(team)
            score_df["Home_Score"].append(int(team_df["Opp"][i]))
            score_df["Away_Score"].append(int(team_df["Tm"][i]))

        else:

            if team_df["Outcome"][i] == "W":

                # Current team is home team
                score_df["Home"].append(team)
                score_df["Away"].append(team_df["Opponent"][i])
                score_df["Home_Score"].append(int(team_df["Tm"][i]))
                score_df["Away_Score"].append(int(team_df["Opp"][i]))

            else:

                # Opponent team is away team
                score_df["Home"].append(team_df["Opponent"][i])
                score_df["Away"].append(team)
                score_df["Home_Score"].append(int(team_df["Opp"][i]))
                score_df["Away_Score"].append(int(team_df["Tm"][i]))

score_df = pd.DataFrame(score_df)
print(score_df)

G           [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...
Date        [Mon, Nov 6, 2023, Fri, Nov 10, 2023, Tue, Nov...
Time        [8:00p, 8:00p, 9:30p, 9:00p, 10:30p, 2:30p, 8:...
Type        [REG, REG, REG, REG, REG, REG, REG, REG, REG, ...
Site        [None, None, N, N, N, N, None, None, None, Non...
Opponent    [North Carolina Central, Manhattan, Kentucky, ...
Conf        [MEAC, MAAC, SEC, None, Big East, SEC, OVC, Bi...
SRS         [-7.88, -15.28, 16.79, None, 19.27, 21.81, -11...
Outcome     [W, W, W, W, L, W, W, W, W, W, W, W, W, W, L, ...
Tm          [99, 99, 89, 83, 59, 69, 71, 69, 88, 73, 75, 7...
Opp         [56, 61, 84, 56, 73, 60, 63, 65, 69, 64, 71, 6...
OT          [None, None, None, None, None, None, None, Non...
W           [1, 2, 3, 4, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...
L           [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, ...
Streak      [W 1, W 2, W 3, W 4, L 1, W 1, W 2, W 3, W 4, ...
Arena       [Allen Fieldhouse, Allen Fieldhouse, United Ce...
Name: Ka

In [72]:
# Get unique teams and index them
teams = list(set(score_df["Home"]).union(set(score_df["Away"])))
team_index = {team: i for i, team in enumerate(teams)}
N = len(teams)

# Initialize Massey matrix and score vector
M = np.zeros((N, N))
b = np.zeros(N)

# Fill the matrix and score vector
for _, row in score_df.iterrows():
    h, a = team_index[row["Home"]], team_index[row["Away"]]
    home_margin = row["Home_Score"] - row["Away_Score"]

    M[h, h] += 1
    M[a, a] += 1
    M[h, a] -= 1
    M[a, h] -= 1

    b[h] += home_margin
    b[a] -= home_margin

# Replace last row to enforce sum constraint (makes matrix invertible)
M[-1, :] = 1
b[-1] = 0

In [73]:
ratings = np.linalg.solve(M, b)

# Convert ratings to a dictionary
massey_ratings = {team: rating for team, rating in zip(teams, ratings)}

# Sort and display rankings
massey_rankings = sorted(massey_ratings.items(), key=lambda x: x[1], reverse=True)

for rank, (team, rating) in enumerate(massey_rankings, 1):
    print(f"{rank}. {team}: {rating:.2f}")

1. Texas Tech: 35.76
2. Marquette: 20.76
3. Houston: 15.26
4. Brigham Young: 14.76
5. Cincinnati: 14.26
6. West Virginia: 12.76
7. UCF: 11.76
8. Iowa State: 10.76
9. Baylor: 9.26
10. Kansas: 6.76
11. TCU: 4.76
12. Indiana: 2.76
13. Connecticut: 2.76
14. Kentucky: 1.76
15. Eastern Illinois: -1.24
16. Kansas State: -1.74
17. Missouri: -2.24
18. Tennessee: -2.24
19. Oklahoma: -4.24
20. Yale: -8.24
21. Texas: -12.24
22. Wichita State: -12.24
23. Kansas City: -12.24
24. Oklahoma State: -19.74
25. Chaminade: -20.24
26. Manhattan: -31.24
27. North Carolina Central: -36.24


# Sample Code

In [12]:
# Example dataset
df = pd.DataFrame({
    "Home": ["Kansas", "Arkansas", "Duke"],
    "Home_Score": [78, 70, 65],
    "Away": ["Duke", "Kansas", "Arkansas"],
    "Away_Score": [75, 68, 52]
})

In [13]:
# Get unique teams and index them
teams = list(set(df["Home"]).union(set(df["Away"])))
team_index = {team: i for i, team in enumerate(teams)}
N = len(teams)

# Initialize Massey matrix and score vector
M = np.zeros((N, N))
b = np.zeros(N)

# Fill the matrix and score vector
for _, row in df.iterrows():
    h, a = team_index[row["Home"]], team_index[row["Away"]]
    home_margin = row["Home_Score"] - row["Away_Score"]

    M[h, h] += 1
    M[a, a] += 1
    M[h, a] -= 1
    M[a, h] -= 1

    b[h] += home_margin
    b[a] -= home_margin

# Replace last row to enforce sum constraint (makes matrix invertible)
M[-1, :] = 1
b[-1] = 0

In [14]:
ratings = np.linalg.solve(M, b)

# Convert ratings to a dictionary
massey_ratings = {team: rating for team, rating in zip(teams, ratings)}

# Sort and display rankings
massey_rankings = sorted(massey_ratings.items(), key=lambda x: x[1], reverse=True)

for rank, (team, rating) in enumerate(massey_rankings, 1):
    print(f"{rank}. {team}: {rating:.2f}")

1. Duke: 3.33
2. Kansas: 0.33
3. Arkansas: -3.67
