In [19]:
import pandas as pd
import numpy as np


In [20]:
# Basic position ratings

def basic_position_ratings (train_data):

  home_team = train_data['home_team_no']
  away_team = train_data['away_team_no']
  all_teams = pd.concat([home_team, away_team]).drop_duplicates().sort_values()

  r = 1 / all_teams

  # Create a list of tuples with (team_no, rating)
  ratings = list(zip(all_teams, r))

  return ratings

In [21]:
# Massey Rating functions

def massey_ratings (train_data):

  # Create_massey_X_matrix

  #Number of fa cup 3rd Round Matches
  number_of_matches = len(train_data)
  print(number_of_matches)

  # 3rd Round always 64 teams but code below checks (add in error handling)
  home_team = train_data['home_team_no']
  away_team = train_data['away_team_no']
  all_teams = pd.concat([home_team, away_team]).drop_duplicates().sort_values()
  number_of_teams = len(all_teams)

  # Create Matrix
  X = np.zeros([number_of_matches,number_of_teams]).astype(int) #  games played between teams

  for i,row in train_data.reset_index(drop=True).iterrows():
        winning_team_no = row['actual_winning_team_no']
        losing_team_no = row['actual_losing_team_no']

        # Subtract one from the team number to get the index for number of teams
        winning_team_index = winning_team_no - 1
        losing_team_index = losing_team_no - 1

        # Update the X matrix
        X[i, winning_team_index] = 1
        X[i, losing_team_index] = -1

#create_massey_Y_vector

  #Number of fa cup 3rd Round Matches
  number_of_matches = len(train_data)

  Y = np.zeros([number_of_matches]).astype(int) # score differential between teams

  #calculate the absolute score difference
  for i,row in train_data.reset_index(drop=True).iterrows():
        winning_team_no = row['actual_winning_team_no']
        losing_team_no = row['actual_losing_team_no']

        if winning_team_no == row['home_team_no']:
            score_diff = row['home_team_score'] - row['away_team_score']
        else:
            score_diff = row['away_team_score'] - row['home_team_score']

        Y[i] = score_diff

# calculate_massey_rating(X, Y, all_teams)

  M = X.transpose().dot(X)
  M[-1,:] = 1
  p =  X.transpose().dot(Y)
  p[-1] = 0

  r = np.linalg.solve(M,p)

  ratings = list(zip(all_teams, r))

  return ratings


In [22]:
# Colley Rating functions

def colley_ratings (train_data):

    #Number of fa cup 3rd Round Matches
    number_of_matches = len(train_data)
    print(number_of_matches)

    # 3rd Round always 64 teams but code below checks (add in error handling)
    home_team = train_data['home_team_no']
    away_team = train_data['away_team_no']
    all_teams = pd.concat([home_team, away_team]).drop_duplicates().sort_values()
    number_of_teams = len(all_teams)

    #create_colley_C_matrix (train_data):

    # C matrix of games played
    C = np.zeros((number_of_teams, number_of_teams)).astype(int)

    for i, team in enumerate(all_teams):
        team_games = train_data[(train_data['home_team_no'].isin([team])) | (train_data['away_team_no'].isin([team]))]
        games_played = team_games['match_count'].sum()
        C[i, i] = 2 + games_played

        for j, opponent in enumerate(all_teams):
            if team != opponent:
                games_between = team_games[(team_games['home_team_no'].isin([opponent])) | (team_games['away_team_no'].isin([opponent]))]['match_count'].sum()
                if games_between > 0:
                  C[i, j] = -games_between




  # create_colley_b_vector (train_data):

    # b matrix of wins and losses
    b = np.zeros(number_of_teams)

    for i, team in enumerate(all_teams):
      team_games = train_data[(train_data['home_team_no'].isin([team])) | (train_data['away_team_no'].isin([team]))]
      wins = 0
      losses = 0

      for _, row in team_games.iterrows():
             home_team = row['home_team_no']
             away_team = row['away_team_no']
             home_team_score = row['home_team_score']
             away_team_score = row['away_team_score']

             if home_team == team:
                  if home_team_score > away_team_score:
                          wins += 1
                  elif home_team_score < away_team_score:
                          losses += 1
             else:
                  if away_team_score > home_team_score:
                         wins += 1
                  elif away_team_score < home_team_score:
                          losses += 1

      b[i] = 1 + (wins - losses) / 2

#def calculate_colley_ratings(C, b, all_teams):

    r = np.linalg.solve(C, b)
    ratings = list(zip(all_teams, r))
    return ratings

In [23]:
# Keener Rating functions

def keener_ratings (train_data):

  #create_keener_A_matrix_intial (train_data):

  winning_teams = train_data['home_team_no']
  losing_teams = train_data['away_team_no']
  all_teams = pd.concat([winning_teams, losing_teams]).drop_duplicates().sort_values().reset_index(drop=True)
  number_of_teams = len(all_teams)

  # A matrix of teams played
  A = np.zeros((number_of_teams, number_of_teams)).astype(int)

  for i, team in enumerate(all_teams):
      team_games = train_data[(train_data['home_team_no'] == team) | (train_data['away_team_no'] == team)] # will pick up all games as a team is either home or away

      #print(i)
      #print(team)
      #print(team_games[['home_team_no','away_team_no','home_team_score','away_team_score']].reset_index(drop=True))


      for j, opponent in enumerate(all_teams):
            # will loop through where team is home but update both home & away team scores
            if team != opponent:
                  games_between = team_games[(team_games['home_team_no'] == team) & (team_games['away_team_no'] == opponent)]
                  for _, row in games_between.iterrows():
                      #print(team, opponent)
                      home_team_score = row['home_team_score']
                      away_team_score = row['away_team_score']
                      A[team - 1, opponent - 1] += home_team_score  # Home team score in row i
                      A[opponent - 1, team -1]  += away_team_score  # Away team score in column j

  #update_keener_A_matrix_strength_laplace_equation(A):

  # A matrix of teams played
  strength_A = np.zeros((number_of_teams, number_of_teams)).astype(np.float64)

  for i, team in enumerate(all_teams): # matrix rows
      for j, opponent in enumerate(all_teams): # matrix column
          #print(i)
          #print(j)
          ij = A[i, j]
          ji = A[j, i]
          #print(ij)
          #print(ji)
          strength_ij = (ij + 1) / (ij + ji + 2)
          strength_A[i, j] = strength_ij  # Assigning strength_ij to A[i, j]

  #update_keener_A_matrix_skew (strength_A):

  skew_A = np.zeros((number_of_teams, number_of_teams)).astype(np.float64)

  for i, team in enumerate(all_teams): # matrix rows
      for j, opponent in enumerate(all_teams): # matrix column
        ij = strength_A[i, j]
        skew_ij = 0.5 + ((np.sign(ij - 0.5) * np.sqrt(np.abs(2 * ij - 1))) / 2)
        skew_A[i, j] = skew_ij

  #calculate_keener_eigenvector_rating (skew_A):

  # skew_A as input Matrix to calculate eigenvalues and eigenvecotors
  eigenvalues, eigenvectors = np.linalg.eig(skew_A)

  # Sort the eigenvalues by their absolute values in descending order largest fist
  # Get the largest eigenvalue and corresponding vector where index = 0 as first value descending

  sorted_indices = np.argsort(np.abs(eigenvalues))[::-1]
  largest_eigenvalue = eigenvalues[sorted_indices[0]]
  r = abs(eigenvectors[:, sorted_indices[0]])

  ratings = list(zip(all_teams, r))

  return ratings

In [24]:
def trueskill_ratings (train_data):

    from trueskill import Rating, rate_1vs1

    # Sort the dataframe by date as recent games have more impact on rating in TrueSkill
    train_data_sorted_date = train_data.sort_values('match_date', ascending=True)

    ratings_trueskill = []

    for _, match in train_data_sorted_date.iterrows():
        team1 = match['home_team_no']
        team2 = match['away_team_no']
        winner = match['actual_winning_team_no']

        # Get or create ratings for each team
        rating1 = next((item['rating'] for item in ratings_trueskill if item['team_no'] == team1), Rating())
        rating2 = next((item['rating'] for item in ratings_trueskill if item['team_no'] == team2), Rating())

        # Update ratings based on the match result
        if winner == team1:
            new_rating1, new_rating2 = rate_1vs1(rating1, rating2)
        else:
            new_rating2, new_rating1 = rate_1vs1(rating2, rating1)

        # Store or update the ratings
        for team, new_rating in [(team1, new_rating1), (team2, new_rating2)]:
            existing_item = next((item for item in ratings_trueskill if item['team_no'] == team), None)
            if existing_item:
                existing_item['rating'] = new_rating
            else:
                ratings_trueskill.append({'team_no': team, 'rating': new_rating})
    ratings = [(item['team_no'], item['rating'].mu) for item in ratings_trueskill]

    return ratings