In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json
from collections import defaultdict
from joblib import Parallel, delayed

import sqlite3
import sys
import time
import math
import tqdm
import datetime
import os
import pickle
from pathlib import Path

from glicko2 import Player

if os.path.exists('/workspace/data'):
    # Load the dictionary of DataFrames from the pickle
    data_path = '/workspace/data/'
else:
    data_path = '../data/'

## Loading SQLite Database into Pandas DataFrames

The following code connects to an SQLite database (`melee_player_database.db`) and converts each table within the database into a pandas DataFrame. The DataFrames will be stored in a dictionary, where each key corresponds to the table name with `_df` appended, and the values are the respective DataFrames.

### Steps:

1. **Database Connection**: We use the `sqlite3` library to connect to the SQLite database file.
2. **Retrieve Table Names**: A query retrieves all the table names in the database.
3. **Convert Tables to DataFrames**: For each table:
   - The table is loaded into a pandas DataFrame using `pd.read_sql()`.
   - We check each column to see if any data is JSON-formatted (lists or dictionaries). If so, we convert these columns from strings into their corresponding Python objects using `json.loads()`.
4. **Store DataFrames**: The DataFrames are stored in a dictionary, where the key is the table name with a `_df` suffix, and the value is the DataFrame.
5. **Database Connection Closed**: Once all tables are loaded into DataFrames, the database connection is closed.

### Example:
If the database contains a table named `players`, the corresponding DataFrame will be stored in the dictionary with the key `players_df`, and can be accessed as:

```python
players_df = dfs['players_df']


In [4]:
# Function to get the table names
def get_table_names(conn):
    query = "SELECT name FROM sqlite_master WHERE type='table';"
    return pd.read_sql(query, conn)['name'].tolist()

# Function to load tables into DataFrames
def load_tables_to_dfs(conn):
    table_names = get_table_names(conn)
    dataframes = {}
    
    for table in table_names:
        # Load table into a DataFrame
        df = pd.read_sql(f"SELECT * FROM {table}", conn)
        
        # Detect and convert JSON formatted columns (if any)
        for col in df.columns:
            # Check if any entry in the column is a valid JSON (list or dictionary)
            if df[col].apply(lambda x: isinstance(x, str)).all():
                try:
                    # Try parsing the column as JSON
                    df[col] = df[col].apply(lambda x: json.loads(x) if pd.notnull(x) else x)
                except (json.JSONDecodeError, TypeError):
                    # If it fails, skip the column
                    pass
        
        # Store the DataFrame with table name + '_df'
        dataframes[f"{table}_df"] = df
        
    return dataframes

if os.path.exists(data_path + 'dfs_dict.pkl'):
    cell_has_run = True
    # Load the dictionary of DataFrames from the pickle
    with open(data_path + 'dfs_dict.pkl', 'rb') as f:
        dfs = pickle.load(f)
# Check if the flag variable exists in the global scope so that this code does not run twice
if 'cell_has_run' not in globals():
    path = data_path + "melee_player_database.db"
    
    # Connect to the database
    conn = sqlite3.connect(path)

    # Convert each table into a DataFrame
    dfs = load_tables_to_dfs(conn)

    # Close the connection
    conn.close()

    # Now, you have a dictionary 'dfs' where each key is the table name with '_df' suffix and value is the corresponding DataFrame.
    # For example, to access the DataFrame for a table called 'players':
    # players_df = dfs['players_df']

    dfs['tournament_info_df']['start'] = pd.to_datetime(dfs['tournament_info_df']['start'], unit='s')
    dfs['tournament_info_df']['end'] = pd.to_datetime(dfs['tournament_info_df']['end'], unit='s')

    
    # Set the flag to indicate that the cell has been run
    cell_has_run = True

### Here we adjust the data types of the dataframes so that they are the correct type. (This will be updated as needed.)

In [5]:
dfs['sets_df']['best_of'] = dfs['sets_df']['best_of'].fillna(0).astype(int) 

### Here we make dataframes that we will use and print the head.

The integers in 'characters' count the number of games the player has played that character. (We verify this for Zain below.)

In [None]:
players_df = dfs['players_df']
players_df.head()

In [None]:
ranking_df = dfs['ranking_df']
ranking_df.head()

In [None]:
ranking_seasons_df = dfs['ranking_seasons_df']
ranking_seasons_df.head()

In [None]:
sets_df = dfs['sets_df']
print(f"{sets_df[sets_df['game_data'].apply(lambda x: len(x) > 0)].shape[0] / sets_df.shape[0]:0.01%} percent of sets have some game data)")

sets_df.head()

In [None]:
tournament_info_df = dfs['tournament_info_df']
tournament_info_df.head()

In [11]:
# Code optimization by Dan
# Basically we want to replace this line in process_tournament with something more efficient:
#
#      tournament_sets_df = sets_df[sets_df['tournament_key'] == tournament_key]
#
# Instead, we can
# - Merge the tournament date info into ``sets_df``
# - Sort by date
# - Store the start/end positions of each tournament in a separate dictionary
# - Use tournament_sets_df = sets_df.iloc[start:end+1] instead.

sets_df = sets_df.merge(tournament_info_df[['key', 'start', 'end']], left_on='tournament_key', right_on='key', how='left')
sets_df = sets_df.drop(labels=['key_y'], axis='columns')
sets_df = sets_df.rename(columns={"key_x": "key"})
sets_df = sets_df.sort_values(by=['end', 'tournament_key']) # Just in case there are tournaments with the exact same end date

In [12]:
# A bit of data cleanup
# TODO: Rerun!
min_date = datetime.datetime(2015, 1, 1)
max_date = datetime.datetime(2024, 12, 31)

sets_df = sets_df[(sets_df['start'] >= min_date) & (sets_df['end'] >= min_date) & (sets_df['start'] <= max_date) & (sets_df['end'] <= max_date)]

In [None]:
sets_df.head(1)

In [14]:
game_data_sets_df = sets_df[sets_df['game_data'].apply(lambda x: len(x) > 0)]

In [None]:
# Here, for each row in game_data_sets_df,
# we will replace it with multiple rows,
# one for each entry in 'game_data',
# with other row entries updated accordingly

# No point in keeping track of the game data after this
new_columns = list(game_data_sets_df.columns)
new_columns.remove('game_data')

# Primitive progress bar so we don't get bored waiting
counter = 0

# One row -> multiple rows as above
def duplicate_row(x):
    global counter

    # Create a new row for each entry in ['game_data']
    # Store the result here
    games = []

    for game_info in x.iloc[0]['game_data']:
        game = x.iloc[0][new_columns].copy()

        # Invalid character data, just skip the row 
        if game_info['winner_char'] is None or game_info['loser_char'] is None:
            continue

        # This helps deal with invalid player data in ['game_data']
        # on the off chance that it exists
        winner_id = game['p1_id'] if game['winner_id'] == game['p1_id'] else game['p2_id']
        loser_id  = game['p2_id'] if game['winner_id'] == game['p1_id'] else game['p1_id']

        # Update the player ids to include the character that they played
        winner_pc = winner_id + '/' + game_info['winner_char']
        loser_pc  = loser_id  + '/' + game_info['loser_char']

        # Change ids and scores in this row to reflect that specific game
        if game['winner_id'] == game['p1_id']:
            game['p1_id'] = winner_pc
            game['p1_score'] = game_info['winner_score']
            game['p2_id'] = loser_pc
            game['p2_score'] = game_info['loser_score']
        else:
            game['p1_id'] = loser_pc
            game['p1_score'] = game_info['loser_score']
            game['p2_id'] = winner_pc
            game['p2_score'] = game_info['winner_score']

        game['winner_id'] = winner_pc

        games.append(game)

    # Progress bar
    if counter % 10000 == 0:
        print(counter)

    counter += 1

    return pd.DataFrame(games, columns=x.columns)

# More janky nonsense.
# reset_index() to get an index to groupby().
# Then groupby() on that index (yes, one row per group),
#     BECAUSE .apply() on groupby can take a function that returns a dataframe with a different number of rows
processed_game_data_sets = game_data_sets_df.reset_index().groupby('index').apply(duplicate_row, include_groups=False).reset_index(drop=True)
processed_game_data_sets[(processed_game_data_sets['p1_id'] == '1021/melee/yoshi') | (processed_game_data_sets['p2_id'] == '1021/melee/yoshi')]

In [None]:
# pd.to_pickle(processed_game_data_sets, data_path + 'individual_game_data.pkl')
individual_game_data_df = pd.read_pickle(data_path + 'individual_game_data.pkl')
individual_game_data_df = individual_game_data_df.sort_values('end')
individual_game_data_df

In [16]:
# Because it would take an ABSURDLY long time to compute the ELOs of
# (specific_player)/(specific_character) \union (anyone)/(specific_opponent_character)
# we will instead approximate as follows:
# 1. compute the ELOs of (anyone)/(specific_opponent_character) (ex: fox vs fox, to guage vaguely how good each fox player is)
# 2. Having those values fixed, compute the ELO of (specific_player)/(specific_character) (ex: aMSa/yoshi vs all fixed fox player ELOs)
#    in their matches against any such player from before.

individual_game_data_df['winner_char'] = individual_game_data_df['winner_id'].apply(lambda x: x.split('/')[-1])
individual_game_data_df['p1_char'] = individual_game_data_df['p1_id'].apply(lambda x: x.split('/')[-1])
individual_game_data_df['p2_char'] = individual_game_data_df['p2_id'].apply(lambda x: x.split('/')[-1])

all_characters = list(set(list(individual_game_data_df['p1_char'].unique()) + list(individual_game_data_df['p1_char'].unique())))

In [None]:
# Dataframes containing only character vs same character

char_vs_char_dataframes = {}

for character in all_characters:
    char_vs_char_df = individual_game_data_df[(individual_game_data_df['p1_char'] == character) & (individual_game_data_df['p2_char'] == character)]
    char_vs_char_df = char_vs_char_df.reset_index(drop=True) # Necessary for proper position calculations later

    # Some more data cleanup, although this should have been done earlier
    # TODO: Do this earlier.
    min_date = datetime.datetime(2015, 1, 1)
    max_date = datetime.datetime(2024, 12, 31)
    char_vs_char_df = char_vs_char_df[(char_vs_char_df['start'] >= min_date) &
                                      (char_vs_char_df['end'] >= min_date) &
                                      (char_vs_char_df['start'] <= max_date) &
                                      (char_vs_char_df['end'] <= max_date)]
    
    # To avoid absurd pollution of all the datasets,
    # let's only consider players who have actually played more than a few games
    # against another person with the same character
    MIN_GAMES = 3

    game_players_series = pd.concat([char_vs_char_df['p1_id'], char_vs_char_df['p2_id']])
    total_games = game_players_series.value_counts()
    regular_players = total_games[total_games >= MIN_GAMES]

    char_vs_char_dataframes[character] = char_vs_char_df[(char_vs_char_df['p1_id'].apply(lambda x: x in regular_players.index)) &
                                                         (char_vs_char_df['p2_id'].apply(lambda x: x in regular_players.index))]

char_vs_char_dataframes['mrgameandwatch']

In [None]:
# Dictionary of dictionaries. First key is the specific character used for the matchup.
# For each one, we will store the start/end indexes of each tournament in each dataframe.
# This will be used for far more efficient processing later
date_positions_by_char = {}

for character in all_characters:
    print(character)

    char_games_df = char_vs_char_dataframes[character]
    positions = {}

    initial_date = datetime.datetime(2015, 1, 1)
    interval = datetime.timedelta(weeks=1)

    end_date = char_games_df['end'].max()

    date = initial_date

    # Loop through and test for containment in the interval [date, date+interval)
    while date + interval <= end_date:
        is_in_interval = (sets_df['end'] >= date) & (sets_df['end'] < date + interval)
        is_in_interval = is_in_interval.reset_index(drop=True) # Make absolutely sure it can be used for iloc

        if (~is_in_interval).all(): # all False
            positions[date + interval] = (0,-1)
        else: # At least one True
            positions[date + interval] = (is_in_interval.idxmax(), is_in_interval[::-1].idxmax())

        date += interval

    date_positions_by_char[character] = positions

In [19]:
# TLDR the ELO computation occasionally seems to go negative, and I think this can cause a lot of problems.
# Likewise, the RD value seems to get unusually large occasionally
MIN_ELO = 500.0
MAX_RD = 350.0

def process_tournament(player_ratings_df, player_rds_df, games_df, positions, specific_date, key_is_date, glicko_objects):
    # Get the games for this specific period, using precomputed indices for speed.
    period_df = games_df.iloc[positions[specific_date][0]:positions[specific_date][1]+1]
    unique_players = list(player_ratings_df.columns)

    # Prepare player matches DataFrame.
    # Essentially, one copy where p1 is the player of consideration,
    # and then another copy where p2 is under consideration.
    df_p1 = period_df[['p1_id', 'p2_id', 'winner_id']].copy()
    df_p1.rename(columns={'p1_id': 'player_id', 'p2_id': 'opponent_id'}, inplace=True)
    df_p1['outcome'] = (df_p1['winner_id'] == df_p1['player_id']).astype(int)
    df_p1 = df_p1[['player_id', 'opponent_id', 'outcome']]
    
    df_p2 = period_df[['p2_id', 'p1_id', 'winner_id']].copy()
    df_p2.rename(columns={'p2_id': 'player_id', 'p1_id': 'opponent_id'}, inplace=True)
    df_p2['outcome'] = (df_p2['winner_id'] == df_p2['player_id']).astype(int)
    df_p2 = df_p2[['player_id', 'opponent_id', 'outcome']]

    outcomes_df = pd.concat([df_p1, df_p2], ignore_index=True)

    # Add opponent ELO and RD values
    outcomes_df['opponent_rating'] = outcomes_df['opponent_id'].apply(lambda x: glicko_objects[x].getRating())
    outcomes_df['opponent_rd'] = outcomes_df['opponent_id'].apply(lambda x: glicko_objects[x].getRd())

    # Compile the opponent data into lists (one for ratings, one for rds, one for outcomes) for every player
    grouped = outcomes_df.groupby('player_id').agg({
        'opponent_rating': list,
        'opponent_rd': list,
        'outcome': list
    }).reset_index()

    # We will need to update the glicko objects separately for players that did or didn't play in this rating period
    players_with_games = list(grouped['player_id'])
    players_without_games = [x for x in unique_players if x not in players_with_games]

    #First, let's handle the players that did play games in this period
    def update_glicko(row):
        glicko_objects[row['player_id']].update_player(row['opponent_rating'], row['opponent_rd'], row['outcome'])

    grouped.apply(update_glicko, axis=1)

    # Now let's handle players that did NOT play games in this period
    for player in players_without_games:
        glicko_objects[player].did_not_compete()

    # This might be a bug in the glicko2 library that we are using,
    # but occasionally we get weird values for the rating and/or RD
    # and this DOES very occasionally seem to cause the whole thing to crash
    # (division by zero somewhere)
    # and so we use professional-grade duct tape here
    for player in unique_players:
        if glicko_objects[player].getRating() < MIN_ELO:
            glicko_objects[player].setRating(MIN_ELO)

        if glicko_objects[player].getRd() > MAX_RD:
            glicko_objects[player].setRd(MAX_RD)

    # Now update that specific row of the player ratings dataframe with the new glicko data
    date = specific_date if key_is_date else games_df.iloc[positions[specific_date][0]]['end']

    #new_player_data = {}
    #for player in unique_players:
    #    new_player_data[player] = glicko_objects[player].getRating()

    # These should be in the same order as the columns, as unique_players was used to create the columns as well.
    # This is slightly more efficient than using a dictionary.
    new_player_ratings = [glicko_objects[player].getRating() for player in unique_players]
    new_player_rds     = [glicko_objects[player].getRd()     for player in unique_players]

    player_ratings_df.loc[date] = new_player_ratings
    player_rds_df.loc[date]     = new_player_rds

In [None]:
# Basically, we will instead create a dataframe with index of dates, columns of players
player_ratings_char_vs_char = {}
player_rds_char_vs_char = {}

for character in all_characters:
    print(character)

    games_df = char_vs_char_dataframes[character]
    positions = date_positions_by_char[character]

    unique_players = list(set(list(games_df['p1_id'].unique()) + list(games_df['p2_id'].unique())))

    # Note that this technically starts at initial_date + interval, not initial_date
    dates = [x for x in positions]

    # Convenient store of glicko objects
    glicko_objects = {}
    for player in unique_players:
        glicko_objects[player] = Player()

    # Initial row of default ELO values, at the earliest possible date.
    # Note that we are also pre-allocating the dataframe with the rest of the possible dates.
    player_ratings_df = pd.DataFrame([[1500.0] * len(unique_players)], columns=unique_players, index=[initial_date] + dates)
    player_rds_df = pd.DataFrame([[350.0] * len(unique_players)], columns=unique_players, index=[initial_date] + dates)

    # Loop over the individual date intervals above.
    for specific_date in tqdm.tqdm(dates, total=len(dates)):
        process_tournament(player_ratings_df, player_rds_df, games_df, positions, specific_date, True, glicko_objects)

    Path(data_path + 'char_vs_char/').mkdir(parents=True, exist_ok=True)
    player_ratings_df.to_pickle(data_path + 'char_vs_char/' + character + '.pkl')
    #print('Size in memory: {0:.2f}'.format(sys.getsizeof(player_ratings_df) / (1024.0*1024.0)))
    
    player_ratings_char_vs_char[character] = player_ratings_df
    player_rds_char_vs_char[character] = player_rds_df

In [None]:
# 1021/melee/yoshi - aMSa
# 19554/melee/fox - Cody

# For convenience, let us restrict our attention to players who actually have a reasonable amount of data with a certain character
MIN_GAMES = 10

game_players_series = pd.concat([individual_game_data_df['p1_id'], individual_game_data_df['p2_id']])
total_games = game_players_series.value_counts()
regular_players = total_games[total_games >= MIN_GAMES]

print("{0} regular player/char combos".format(len(regular_players.index)))

# Lots of memory usage. Let's just reduce down to what we need.
reduced_game_data_df = individual_game_data_df[['p1_id', 'p2_id', 'winner_id', 'p1_char', 'p2_char', 'winner_char', 'start', 'end']]

# Each row should contribute twice, swapping 'player' and 'opponent'
df_p1 = reduced_game_data_df[reduced_game_data_df['p1_id'].apply(lambda x: x in regular_players.index)].copy()
df_p1.rename(columns={'p1_id': 'player_id', 'p2_id': 'opponent_id',
                      'p1_char': 'player_char', 'p2_char': 'opponent_char'}, inplace=True)
df_p1['outcome'] = (df_p1['winner_id'] == df_p1['player_id']).astype(int)
df_p1 = df_p1[['player_id', 'opponent_id', 'player_char', 'opponent_char', 'outcome', 'start', 'end']]
    
df_p2 = reduced_game_data_df[reduced_game_data_df['p2_id'].apply(lambda x: x in regular_players.index)].copy()
df_p2.rename(columns={'p2_id': 'player_id', 'p1_id': 'opponent_id',
                      'p2_char': 'player_char', 'p1_char': 'opponent_char'}, inplace=True)
df_p2['outcome'] = (df_p2['winner_id'] == df_p2['player_id']).astype(int)
df_p2 = df_p2[['player_id', 'opponent_id', 'player_char', 'opponent_char', 'outcome', 'start', 'end']]

reduced_game_data_df = pd.concat([df_p1, df_p2], ignore_index=True)

# Save some memory - these are probably huge.
del df_p1
del df_p2

In [None]:
reduced_game_data_df

In [None]:
# Returns Rating, RD, and a bool for (actually found = True, default values = False)
def get_opponent_elo_rd(row):
    # See how opponent/char compares against (others)/char for the same char
    player_ratings = player_ratings_char_vs_char[row['opponent_char']]
    player_rds = player_rds_char_vs_char[row['opponent_char']]

    # Not in our main list of players
    if row['opponent_id'] not in player_ratings.columns:
        return (1500.0, 350.0, False)

    # No old enough data
    if player_ratings.index[0] > row['start']:
        return (1500.0, 350.0, False)

    # We can take advantage of the fact that the index of player_ratings is always in regular intervals.
    start_date = player_ratings.index[0]
    interval = player_ratings.index[1] - player_ratings.index[0] # I guess we're assuming at least two entries?

    newest_index = int((row['start'] - start_date) / interval)

    # Might actually be out of bounds on the data we have,
    # i.e. 'start' might be well beyond the dates we have data on.
    # In this case, just use the newest piece of data.
    if newest_index >= len(player_ratings.index):
        newest_index = len(player_ratings.index) - 1

    return (player_ratings.iloc[newest_index][row['opponent_id']], player_rds.iloc[newest_index][row['opponent_id']], True)

reduced_game_data_df['result'] = reduced_game_data_df.apply(get_opponent_elo_rd, axis=1)

reduced_game_data_df['opponent_rating'] = reduced_game_data_df['result'].apply(lambda x: x[0])
reduced_game_data_df['opponent_rd']     = reduced_game_data_df['result'].apply(lambda x: x[1])
reduced_game_data_df['opponent_found']  = reduced_game_data_df['result'].apply(lambda x: x[2])

reduced_game_data_df.drop(columns=['result'], inplace=True)

reduced_game_data_df

In [None]:
# Again more data cleanup. Should probably be somewhere more convenient.
min_date = datetime.datetime(2015,1,1)
max_date = datetime.datetime(2024,12,31)

reduced_game_data_df = reduced_game_data_df[(reduced_game_data_df['start'] >= min_date) &
                                            (reduced_game_data_df['end'] >= min_date) &
                                            (reduced_game_data_df['start'] <= max_date) &
                                            (reduced_game_data_df['end'] <= max_date)]

reduced_game_data_df

In [25]:
# Compute weekly intervals to group by, quite easily.
start_date = player_ratings_char_vs_char['fox'].index[0]
interval = player_ratings_char_vs_char['fox'].index[1] - player_ratings_char_vs_char['fox'].index[0]

reduced_game_data_df = reduced_game_data_df.copy() # "Copy of a slice" nonsense, this should fix it.
reduced_game_data_df['end_index'] = reduced_game_data_df['end'].apply(lambda x: math.ceil((x - start_date) / interval))

In [27]:
# reduced_game_data_df.to_pickle(data_path + 'reduced_game_data.pkl')
reduced_game_data_df = pd.read_pickle(data_path + 'reduced_game_data.pkl')

In [29]:
# Group by player (includes their character!), opponent character, week index
grouped_df = reduced_game_data_df[['player_id', 'opponent_char', 'end_index',
                                   'opponent_rating', 'opponent_rd', 'outcome']].groupby(['player_id', 'opponent_char', 'end_index']).agg({
        'opponent_rating': list,
        'opponent_rd': list,
        'outcome': list
    }).reset_index()

In [None]:
grouped_df['player_char_char'] = grouped_df.apply(lambda row: row['player_id'] + '/' + row['opponent_char'], axis=1)
grouped_df.drop(columns=['player_id', 'opponent_char'], inplace=True)
grouped_df

In [None]:
# grouped_df.to_pickle(data_path + 'temp_file_until_better_name.pkl')
grouped_df = pd.read_pickle(data_path + 'temp_file_until_better_name.pkl')
grouped_df

In [None]:
# To deal with inlcude_groups=True being deprecated and disallowed soon,
# let's just create a copy of this column
grouped_df['pcc_duplicate'] = grouped_df['player_char_char']

player_char_char_elos = {}

# We will create a single table. Index is dates, columns is player/melee/pchar/ochar.
unique_players = list(grouped_df['player_char_char'].unique())

initial_date = datetime.datetime(2015, 1, 1)
end_date = datetime.datetime(2024, 12, 31) # TODO: Properly compute this instead of just guessing
interval = datetime.timedelta(weeks=1)

# Bugfix stuff
MIN_ELO = 500.0
MAX_RD = 350.0

# TODO: Surely there's a more professional way to do this bit.
dates = {0: initial_date}

date = initial_date + interval
i = 1

while date <= end_date:
    dates[i] = date

    date += interval
    i += 1

# Convenient store of glicko objects
glicko_objects = {}
for player in unique_players:
    glicko_objects[player] = Player()

# Pre-allocating the dataframe for maximum efficiency.
player_ratings_df = pd.DataFrame([[1500.0] * len(unique_players)], columns=unique_players, index=list(dates.values())) 

def compute_pcc_elo(x):
    # player/char/char
    pcc = x.iloc[0]['pcc_duplicate']

    # More easily allow for getting the week number
    x = x.set_index('end_index')

    glicko_object = glicko_objects[pcc]

    # More efficient to keep track of where every occuring week number is (as an iloc).
    weeknum_to_iloc = [-1]*len(dates)
    for i in range(0, len(x.index)):
        weeknum_to_iloc[x.index[i]] = i

    for index in dates:
        if weeknum_to_iloc[index] == -1:
            glicko_object.did_not_compete()
        else:
            glicko_object.update_player(x.iloc[weeknum_to_iloc[index]]['opponent_rating'],
                                        x.iloc[weeknum_to_iloc[index]]['opponent_rd'],
                                        x.iloc[weeknum_to_iloc[index]]['outcome'])

        # Bugfix stuff
        if glicko_object.getRating() < MIN_ELO:
            glicko_object.setRating(MIN_ELO)

        if glicko_object.getRd() > MAX_RD:
            glicko_object.setRd(MAX_RD)

        player_ratings_df.loc[initial_date + index*interval, pcc] = glicko_object.getRating()

# TODO: Figure out how to include the group 
grouped_df.groupby('player_char_char').apply(compute_pcc_elo, include_groups=False)

In [None]:
player_ratings_df['1021/melee/yoshi/fox']

In [23]:
player_ratings_df.to_pickle(data_path + 'char_vs_char_player_rankings_weekly.pkl')