In [3]:
import datetime as dt
import numpy as np
import pandas as pd

In [4]:
def clean_results_data(season, return_data = False, write_data = True):
    season_start_abbrv = season[2:4]
    season_end_abbrv = season[5:]

    data_filename = './Data/results_data/premier_league_{0}_{1}_results.csv'.format(season_start_abbrv, season_end_abbrv)
    
    # Load the results dataset
    results_df = pd.read_csv(data_filename)
    
    # Remove blank rows from the data frame
    results_df = results_df.loc[~results_df.isna().all(axis=1), :]
    
    # Write data to file if write_data == True
    if write_data:
        results_df.to_csv(data_filename, index=False)
    
    # Return cleaned data frame if required
    if return_data:
        return(results_df)
    else:
        return

In [9]:
def update_table_dict(table_dict, results): 
    for i in results.index:
        # Initialize dicts to store values
        home_dict = dict()
        away_dict = dict()
        
        # Assign home and away teams
        home_team = results.loc[i, 'Home']
        away_team = results.loc[i, 'Away']
        
        # Add 1 to the 'Played' stat for each team
        home_dict['Played'] = away_dict['Played'] = 1
        
        # Use the score to compute goals for and goals against for each team
        score = results.loc[i, 'Score']
        hyphen_pos = score.find(b'\xe2\x80\x93'.decode('utf-8'))
        
        home_dict['GF'] = away_dict['GA'] = int(score[:(hyphen_pos)])
        home_dict['GA'] = away_dict['GF'] = int(score[(hyphen_pos + 1):])
        home_dict['GD'] = home_dict['GF'] - home_dict['GA']
        away_dict['GD'] = away_dict['GF'] - away_dict['GA']
        
        # Allocate points based on the scoreline
        if home_dict['GF'] > away_dict['GF']:
            home_dict['Points'] = 3
            away_dict['Points'] = 0
        elif home_dict['GF'] < away_dict['GF']:
            home_dict['Points'] = 0
            away_dict['Points'] = 3
        else:
            home_dict['Points'] = 1
            away_dict['Points'] = 1
            
        # Update table dict
        table_dict[home_team].update({key : table_dict[home_team][key] + home_dict[key] for key in home_dict})
        table_dict[away_team].update({key : table_dict[away_team][key] + away_dict[key] for key in home_dict})
    
    # Return the updated dict
    return(table_dict)

In [5]:
def create_historical_league_tables(season, results_df):
    # Set start year
    start_year = season[0:4]
        
    print('Getting unique datetime objects')
    
    # Create a 'datetime' column that contains the kickoff time of each fixture as a datetime object
    datetime_string_list = [str(date) + ' ' + str(time) for date, time in zip(results_df['Date'], results_df['Time'])]
    results_df['Datetime'] = [dt.datetime.strptime(dt_string, '%Y-%m-%d %H:%M') for dt_string in datetime_string_list]
    
    # Get unique values in datetime column
    datetimes_unique = results_df['Datetime'].sort_values().unique()
    
    # Get list of clubs
    home_clubs = set(results_df['Home'])
    away_clubs = set(results_df['Away'])
    clubs = sorted(list(home_clubs.union(away_clubs)))
    n_clubs = len(clubs)
    
    print('Initializing league table')
    
    # Initialize league table
    league_table_columns = ['Datetime', 'Team', 'Played', 'GF', 'GA', 'GD', 'Points']
    n_columns = len(league_table_columns)
    initial_datetime = dt.datetime(int(start_year), 1, 1)
    
    league_table_data = []
    for i, club in enumerate(clubs):
        init_values = [initial_datetime, club] + [0] * 5
        league_table_data.append(dict(zip(league_table_columns, init_values)))
    
    print(league_table_data)
    print(len(league_table_data))
    
    print('Updating league table')
    
    # Update the league table using the fixtures at each kickoff time
    for datetime in datetimes_unique:
        results = results_df.loc[results_df['Datetime'] == datetime, :]
        
        print('Changing data format to dict of dicts')
        
        # Change the format of the data from a list of dicts to a dict of dicts suitable for passing to update_table_dict
        table_list = league_table_data[-n_clubs:]
        print('Getting table list')
        print(table_list)
        print(len(table_list))
        table_dict = dict()
        for i, club in enumerate(clubs):
            print(table_list[i])
            table_dict[club] = {key : table_list[i][key] for key in table_list[i] if key not in ['Team']}
            
        print('Calling update_table_dict')    
        
        table_dict_updated = update_table_dict(table_dict, results)
        
        print('Reverting format')
        
        # Change the format back to a list of dicts suitable for appending to the league data table
        table_list_updated = [table_dict_updated[club].update({'Team' : club, 'Datetime' : datetime}) for club in table_dict_updated]
        
        print('Appending list')
        
        # Append the new list to the primary list of league table data
        league_table_data.append(table_list_updated)
        
    #Return the finished league table list
    return league_table_data