In [1]:
import numpy as np
import pandas as pd
import pickle as pkl
import time


pd.options.display.max_rows = 100

In [2]:
summary_df = pd.read_csv('leave_summary_20200508.csv').rename(columns={'Unnamed: 0':'leave'}).set_index('leave')

In [3]:
count_dict = summary_df['count'].to_dict()
equity_dict = summary_df['equity'].to_dict()
mean_equity_dict = summary_df['mean_equity'].to_dict()
summary_df = summary_df.reset_index()

In [4]:
summary_df['leave_len'] = summary_df['leave'].apply(lambda x: len(x))

In [5]:
leaves = pkl.load(open('all_leaves.p','rb'))
alphabetical_key = '?ABCDEFGHIJKLMNOPQRSTUVWXYZ'
sort_func = lambda x: alphabetical_key.index(x)

In [6]:
child_leaves = {leave:[''.join(sorted(leave+letter, key=sort_func)) for letter in alphabetical_key]
                for i in range(1,6) for leave in leaves[i]}
child_leaves[''] = [x for x in alphabetical_key]

In [7]:
def get_neighboring_leaves(original_leave):
    subleaves = [original_leave[:i]+original_leave[i+1:] for i in range(len(original_leave))]
    
    neighbors = []
    for leave in subleaves:
        neighbors += child_leaves[leave]
    
    # filter neighbors to make sure they have the same number of blanks
    blank_count = sum([x=='?' for x in original_leave])
    neighbors = [leave for leave in neighbors if sum([x=='?' for x in leave])==blank_count]
    
    # filter out racks that don't exist!!
    neighbors = [leave for leave in neighbors if leave in leaves[len(original_leave)]]
    
    return neighbors

In [8]:
def calculate_smoothed_superleave(superleave):
    neighbors = get_neighboring_leaves(superleave)
    
    neighboring_equity = 0
    neighboring_count = 0
    equity_list = []
    
    for neighbor_leave in neighbors:
        neighboring_equity += equity_dict[neighbor_leave]
        neighboring_count += count_dict[neighbor_leave]
        equity_list.append(mean_equity_dict[neighbor_leave])
        
    equity_list = [x for x in equity_list if pd.notnull(x)]
    
#     print('Original:')
#     print(summary_df.loc[summary_df['leave']==superleave])
#     print(neighboring_equity, neighboring_count, neighboring_equity/neighboring_count)
#     print(np.mean(equity_list))
#     print(equity_list)
    
    return neighboring_equity/neighboring_count


3820 superleaves of length 6 were never observed once

In [9]:
print(summary_df.loc[summary_df['leave_len']==6].apply(lambda x: pd.notnull(x['ev']), axis=1).value_counts())
print(summary_df.loc[summary_df['leave_len']==6].apply(lambda x: pd.notnull(x['ev']) and x['ev']<10, axis=1).value_counts())

True     733491
False      3820
dtype: int64
True     643335
False     93976
dtype: int64


In [10]:
summary_df.loc[summary_df['leave_len']==5].sort_values('ev', ascending=False)[:50]

Unnamed: 0,leave,points,equity,count,bingo_count,mean_score,mean_equity,bingo pct,pct,adjusted_mean_score,ev,leave_len
167808,??LQX,2917,15182.514,103,2,28.320388,147.403049,1.941748,0.011261,-10.751291,106.315359,5
109471,??DQX,3218,15198.186,110,0,29.254545,138.165327,0.0,0.012027,-9.817134,97.077638,5
108048,??DMZ,12534,44523.936,382,61,32.811518,116.554806,15.968586,0.041766,-6.260161,75.467117,5
169820,??MOZ,28258,64857.103,585,184,48.304274,110.866843,31.452991,0.063961,9.232594,69.779154,5
78274,??CCV,3659,17857.948,167,21,21.91018,106.93382,12.57485,0.018259,-17.1615,65.846131,5
87025,??CHK,9801,29445.381,277,70,35.382671,106.301014,25.270758,0.030286,-3.689008,65.213325,5
123214,??ESZ,94252,101743.515,985,765,95.68731,103.292909,77.664975,0.107695,56.61563,62.205219,5
108972,??DOZ,48383,84760.445,827,318,58.504232,102.491469,38.452237,0.09042,19.432553,61.40378,5
118409,??EIZ,250647,270260.787,2662,2148,94.1574,101.525465,80.69121,0.291049,55.085721,60.437776,5
107424,??DLX,18761,35242.078,353,116,53.147309,99.835915,32.86119,0.038595,14.075629,58.748226,5


In [11]:
summary_df['smoothed_ev'] = summary_df['ev']
summary_df['point_equity_diff'] = (summary_df['points']-summary_df['equity'])/summary_df['count']

In [12]:
summary_df.sort_values('point_equity_diff')[:50]

Unnamed: 0,leave,points,equity,count,bingo_count,mean_score,mean_equity,bingo pct,pct,adjusted_mean_score,ev,leave_len,smoothed_ev,point_equity_diff
618171,??DLQX,303,12408.544,46,0,6.586957,269.750957,0.0,0.005029,-32.484723,228.663267,6,228.663267,-263.164
559834,??DDQX,26,815.492,3,0,8.666667,271.830667,0.0,0.000328,-30.405013,230.742978,6,230.742978,-263.164
879005,??LLQX,107,1215.027,7,0,15.285714,173.575286,0.0,0.000765,-23.785965,132.487597,6,132.487597,-158.289571
886010,??LPQX,48,976.316,6,0,8.0,162.719333,0.0,0.000656,-31.071679,121.631644,6,121.631644,-154.719333
887144,??LQXY,74,351.173,2,0,37.0,175.5865,0.0,0.000219,-2.071679,134.498811,6,134.498811,-138.5865
886915,??LQSX,56,324.656,2,0,28.0,162.328,0.0,0.000219,-11.071679,121.240311,6,121.240311,-134.328
620183,??DMOZ,2583,32787.245,233,3,11.085837,140.71779,1.287554,0.025475,-27.985842,99.630101,6,99.630101,-129.631953
457258,??CCJV,10,138.164,1,0,10.0,138.164,0.0,0.000109,-29.071679,97.076311,6,97.076311,-128.164
459910,??CCNV,987,14439.933,107,0,9.224299,134.952645,0.0,0.011699,-29.84738,93.864956,6,93.864956,-125.728346
616124,??DLLX,281,1382.806,9,1,31.222222,153.645111,11.111111,0.000984,-7.849457,112.557422,6,112.557422,-122.422889


In [13]:
leaves_to_smooth = list(summary_df.loc[(summary_df['leave_len']==5) & (summary_df['count']<828)]['leave'].values)
print(len(leaves_to_smooth))
leaves_to_smooth += list(summary_df.loc[(summary_df['leave_len']==6) & (summary_df['count']<234)]['leave'].values)
print(len(leaves_to_smooth))

37022
472533


"ev" is defined as the average equity of a superleave, minus the average equity over all plays in a run of simulated games. It's about 41 points.

In [14]:
mean_equity = summary_df.loc[summary_df['leave']=='??']['mean_equity'].values[0] - \
    summary_df.loc[summary_df['leave']=='??']['ev'].values[0]

In [15]:
summary_df = summary_df.set_index('leave')
smooth_ev_dict = summary_df['ev'].to_dict()

In [None]:
t0 = time.time()

for i,leave in enumerate(leaves_to_smooth):
    if i%1000==0:
        print(i, time.time()-t0)
    
    smooth_ev_dict[leave] = calculate_smoothed_superleave(leave) - mean_equity

0 6.914138793945312e-05
1000 49.34316921234131
2000 117.55532908439636
3000 178.4248161315918


In [None]:
pd.Series(smooth_ev_dict).to_csv('leave_values_20200510_smoothed.csv')