In [2]:


import gc
import os
import faker
import random

import numpy as np
import pandas as pd

from copy import copy


In [6]:

def create_fake_Ridge_ratings(num_players, mean=0, std_dev=3):
    
    """Create fake Ridge ratings for a given number of players."""

    fake = faker.Faker()
    rtg_dict = {}
    ratings = np.random.normal(mean, std_dev, num_players)
    for i in range(num_players):
        rtg_dict[fake.name()] = ratings[i]

    return rtg_dict

def create_Ridge_random_walk(player_rating_dict, num_rounds=250, step_size=0.1, matchup_std_dev=8, stat='scoring'):
    
    """Create a random walk of Ridge ratings, given some ratings."""

    assert(num_rounds>=1)
    names = list(player_rating_dict.keys())

    data_random_walk = []
    for rnd in range(1, num_rounds+1):
        rnd_set = set(copy(names))
        while len(rnd_set)>=2:
            player_a = random.choice(list(rnd_set))
            rnd_set.remove(player_a)
            player_b = random.choice(list(rnd_set))
            rnd_set.remove(player_b)
            data_random_walk.append([player_a, player_b, rnd, player_rating_dict[player_a], player_rating_dict[player_b]])
        
        for player in names:
            nudge = step_size*2*(np.random.random()-0.5)
            rtg  = player_rating_dict[player]
            
            
            rtg = np.max([1, rtg+nudge])
            player_rating_dict[player] = rtg
            
    data_random_walk = pd.DataFrame(data_random_walk, columns=['player_a','player_b','rating_period', 'player_a_true','player_b_true'])
    data_random_walk = data_random_walk.sort_values(by=['rating_period','player_a']).reset_index(drop=True)
    data_random_walk['true_matchup_mean'] = data_random_walk.apply(lambda x: x.player_a_true - x.player_b_true, axis=1)
    data_random_walk['true_matchup_std_dev'] = matchup_std_dev
    data_random_walk['result'] = data_random_walk.apply(lambda x: np.random.normal(x.true_matchup_mean, x.true_matchup_std_dev), axis=1)

    ## need reverse
    reverse = data_random_walk.copy()

    reverse = reverse.rename(columns={
        'player_a':'player_b',
        'player_b':'player_a',
        'player_a_true':'player_b_true',
        'player_b_true':'player_a_true'
    })

    reverse['true_matchup_mean'] = -1*reverse['true_matchup_mean'].copy()
    reverse['result'] = -1*reverse['result'].copy()

    data_random_walk = pd.concat([data_random_walk, reverse], axis=0).sort_values(by=['rating_period','player_a']).reset_index(drop=True)
    data_random_walk['rating_period'] = data_random_walk['rating_period'].astype(int)

    return data_random_walk

player_ratings = create_fake_Ridge_ratings(24)
ridge_random_walk = create_Ridge_random_walk(player_ratings)

ridge_random_walk



Unnamed: 0,player_a,player_b,rating_period,player_a_true,player_b_true,true_matchup_mean,true_matchup_std_dev,result
0,Andrew Hansen,Raymond Palmer,1,-1.520256,-5.372972,3.852716,8,20.447001
1,Angie Smith,Nathan Fischer,1,3.664099,-0.798431,4.462530,8,3.198665
2,Cory Macias,Lindsey Collins,1,3.120270,1.865431,1.254838,8,0.697291
3,David Baker,John Wilson,1,0.012298,-0.606203,0.618501,8,0.856331
4,Eric Jones,Melissa Ramirez,1,5.623578,-0.713908,6.337486,8,-9.587149
...,...,...,...,...,...,...,...,...
5995,Susan Ortiz,David Baker,250,1.570815,2.845764,-1.274949,8,-16.182256
5996,Todd Herrera,Raymond Palmer,250,1.805033,2.408376,-0.603343,8,-12.666388
5997,Tyler Armstrong,Angie Smith,250,1.093399,4.306141,-3.212742,8,-4.204378
5998,William Cole,Joseph Stokes,250,1.100385,1.369763,-0.269378,8,-18.523331
