In [4]:
# From: https://stackoverflow.com/questions/8816729/javascript-equivalent-for-inverse-normal-function-eg-excels-normsinv-or-nor
# Converted from JS to Python

import math

cof = [
    -1.3026537197817094, 6.4196979235649026e-1, 1.9476473204185836e-2,
    -9.561514786808631e-3, -9.46595344482036e-4, 3.66839497852761e-4,
    4.2523324806907e-5, -2.0278578112534e-5, -1.624290004647e-6,
    1.30365583558e-6, 1.5626441722e-8, -8.5238095915e-8, 6.529054439e-9,
    5.059343495e-9, -9.91364156e-10, -2.27365122e-10, 9.6467911e-11,
    2.394038e-12, -6.886027e-12, 8.94487e-13, 3.13092e-13, -1.12708e-13,
    3.81e-16, 7.106e-15, -1.523e-15, -9.4e-17, 1.21e-16, -2.8e-17,
]
def erf(x):
    isneg = False
    d = 0
    dd = 0
    t = 0
    ty = 0
    tmp = 0
    res = 0
    
    if x < 0:
        x = -x
        isneg = True
    
    t = 2 / (2 + x)
    ty = 4 * t - 2
    
    for j in range(len(cof) - 1, 0, -1):
        tmp = d
        d = ty * d - dd + cof[j]
        dd = tmp
    
    res = t * math.exp(-x * x + 0.5 * (cof[0] + ty * d) - dd)
    return res - 1 if isneg else 1 - res

def erfc(x):
    return 1 - erf(x)

def erfcinv(p):
    j = 0
    x = 0
    err = 0
    t = 0
    pp = 0
    
    if p >= 2:
        return -100
    if p <= 0:
        return 100
    pp = p if p < 1 else 2 - p
    t = math.sqrt(-2 * math.log(pp / 2))
    x = -0.70711 * ((2.30753 + t * 0.27061) / (1 + t * (0.99229 + t * 0.04481)) - t)
    for j in range(2):
        err = erfc(x) - pp
        x += err / (1.12837916709551257 * math.exp(-x * x) - x * err)
    return x if p < 1 else -x

def inv(p, mean, std):
	return -1.41421356237309505 * std * erfcinv(2 * p) + mean


In [3]:
import random

def projectedScore(x, lookupMapStats):
    # 2. Use NormInv to Calculate One Simulation for each Matchup
    predictedScore = inv(random.random(), lookupMapStats[x]['pointsFor_mean'], lookupMapStats[x]['pointsFor_std'])
    return predictedScore

def simulation(run, upcoming_matches, completed_matches, num_playoff_teams, lookupMapStats):
    matches_copy = upcoming_matches[['displayName', 'userId', 'matchupId', 'week', 'pointsFor']].copy() # copy dataframe takes up extra memory...
    matches_copy['pointsFor'] = upcoming_matches['displayName'].apply(lambda x: projectedScore(x, lookupMapStats))

    home_matchups = matches_copy[::2]
    away_matchups = matches_copy[1::2]

    merged_home = home_matchups.merge(away_matchups[['matchupId', 'week', 'pointsFor']], how='inner', on=['matchupId', 'week'], suffixes=('', '_right'))
    merged_home = merged_home.rename(columns={"pointsFor_right": "pointsAgainst"})

    merged_away = away_matchups.merge(home_matchups[['matchupId', 'week', 'pointsFor']], how='inner', on=['matchupId', 'week'], suffixes=('', '_right'))
    merged_away = merged_away.rename(columns={"pointsFor_right": "pointsAgainst"})

    merged = pd.concat([merged_home, merged_away]).sort_values(by=['week', 'matchupId'])

    all_matches = pd.concat([completed_matches, merged])
    # save all_matches to csv for validation that predictions were accurate...
    all_matches['outcome'] = (all_matches['pointsFor'] > all_matches['pointsAgainst']).astype(int)
    # 3. Figure Out Records of Each Team Post 1 Simulation
    
    # PLAYOFF LEVERAGE
    leverage_df = all_matches.pivot_table(index=['displayName'], columns=['week'], values='outcome').add_prefix('W')
    
    outcomes = all_matches.groupby('displayName').agg({'outcome': 'sum', 'pointsFor': 'sum'})        
    outcomes = outcomes.sort_values(by=['outcome', 'pointsFor'], ascending=False)
    playoff_teams = outcomes.head(num_playoff_teams).copy()
    playoff_teams['isPlayoffTeam'] = 1
    
    predictions = outcomes.merge(playoff_teams, how='left').fillna(0)
    predictions.set_index(outcomes.index,inplace=True)
    
    # Merge Playoff Leverage Table
    predictions = predictions.merge(leverage_df, on='displayName', how='outer')

    predictions['run'] = run
    
    return predictions

    

In [1]:
# simulation driver
# Run 1: Time taken: 52195.011ms ~ todo reduce the time taken...
import time
import pandas as pd

def createSimulation(leagueId, week, num_playoff_teams, runs):
    df = pd.read_csv(f'{leagueId}.csv')
    df = df[['displayName', 'userId', 'matchupId', 'week', 'pointsFor', 'pointsAgainst', 'outcome']].sort_values(by=['week', 'matchupId'])
    
    matchup_filter = (df['week'] <= week)
    completed_matches = df[matchup_filter]
    upcoming_matches = df[~matchup_filter]

    # 1. Find the Mean and Standard Deviation 
    aggregrated_df = completed_matches.groupby('displayName').agg({'pointsFor' : ['mean', 'std']})
    aggregrated_df.columns = list(map(lambda x: x[0] + "_" + x[1], aggregrated_df.columns))
    lookupMapStats = aggregrated_df.to_dict('index')
    
    start = time.time()
    simulation_df = pd.DataFrame()
    for run in range(0, runs):
        df = simulation(run, upcoming_matches, completed_matches, num_playoff_teams, lookupMapStats)
        df = df[['run', 'outcome', 'pointsFor', 'isPlayoffTeam', 'W11', 'W12', 'W13', 'W14']]
        simulation_df = pd.concat([simulation_df, df])
    results = simulation_df.groupby('displayName').agg({'isPlayoffTeam': 'sum'})
    results['probPlayoff'] = round(results['isPlayoffTeam'] / runs * 100, 2)
    end = time.time()
    
    print(lookupMapStats)
    print(f"{leagueId} - {runs} Iterations: {runs}\tTime taken: {(end-start)*10**3:.03f}ms")
#     return results.sort_values(by=['probPlayoff'], ascending=False)
    return simulation_df



In [5]:
import pandas as pd

LAST_COMPLETED_WEEK = 10
PLAYOFF_TEAMS = 6
# RUNS = 10000
RUNS = 1
ATL_LEAGUE_ID = 784961395996356608
TCAN_LEAGUE_ID = 849473673709629440
DYNASTY_LEAGUE_ID = 870520789705854976

results = createSimulation(ATL_LEAGUE_ID, LAST_COMPLETED_WEEK, PLAYOFF_TEAMS, RUNS)

{'BuzzBoy': {'pointsFor_mean': 100.40599999999999, 'pointsFor_std': 16.232976176769174}, 'DrUn1ucky': {'pointsFor_mean': 112.734, 'pointsFor_std': 20.460266208760174}, 'Shahil': {'pointsFor_mean': 93.264, 'pointsFor_std': 24.962910798934395}, 'adv1996': {'pointsFor_mean': 107.65599999999999, 'pointsFor_std': 16.967217541810182}, 'cmendpara': {'pointsFor_mean': 116.59200000000001, 'pointsFor_std': 26.34107261799843}, 'derrickbhole': {'pointsFor_mean': 103.13399999999999, 'pointsFor_std': 15.034233675921836}, 'east2wes': {'pointsFor_mean': 117.36800000000001, 'pointsFor_std': 16.54261352453784}, 'jbick3': {'pointsFor_mean': 109.25399999999999, 'pointsFor_std': 23.85340422944561}, 'neelpatil': {'pointsFor_mean': 105.478, 'pointsFor_std': 17.10075099065665}, 'nigeluno02': {'pointsFor_mean': 104.506, 'pointsFor_std': 25.94416406224893}, 'pranav': {'pointsFor_mean': 108.856, 'pointsFor_std': 11.44770059395724}, 'psmith999': {'pointsFor_mean': 100.75399999999999, 'pointsFor_std': 19.663536587

In [116]:
splits = results.groupby(['displayName', 'W11', 'W12', 'W13', 'W14']).agg({'isPlayoffTeam': 'sum', 'run': 'count'})
# splits['probPlayoff'] = round(results['isPlayoffTeam'] / results.shape[0] * 100, 2)

splits['prob'] = round((splits['isPlayoffTeam'] / splits['run']) * 100, 2)

splits


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,isPlayoffTeam,run,prob
displayName,W11,W12,W13,W14,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
BuzzBoy,0,0,0,0,0.0,1326,0.0
BuzzBoy,0,0,0,1,0.0,923,0.0
BuzzBoy,0,0,1,0,0.0,1031,0.0
BuzzBoy,0,0,1,1,0.0,741,0.0
BuzzBoy,0,1,0,0,0.0,800,0.0
BuzzBoy,0,1,0,1,0.0,580,0.0
BuzzBoy,0,1,1,0,0.0,664,0.0
BuzzBoy,0,1,1,1,1.0,467,0.21
BuzzBoy,1,0,0,0,0.0,749,0.0
BuzzBoy,1,0,0,1,0.0,472,0.0
