In [1]:
from itertools import combinations
import numpy as np
import pandas as pd
import seaborn as sns
from string import ascii_uppercase
import time as time

%matplotlib inline

Create a dictionary of all possible 1 to 6-tile leaves

In [2]:
tilebag = ['A']*9+['B']*2+['C']*2+['D']*4+['E']*12+\
          ['F']*2+['G']*3+['H']*2+['I']*9+['J']*1+\
          ['K']*1+['L']*4+['M']*2+['N']*6+['O']*8+\
          ['P']*2+['Q']*1+['R']*6+['S']*4+['T']*6+\
          ['U']*4+['V']*2+['W']*2+['X']*1+['Y']*2+\
          ['Z']*1+['?']*2
            
# leaves_1 = ascii_uppercase + '?'
# leaves_2 = sorted(list(set(list(combinations(tilebag,2)))))
# leaves_2 = [x[0]+x[1] for x in leaves_2]

In [3]:
t0 = time.time()

maximum_superleave_length = 5
leaves = {i:sorted(list(set(list(combinations(tilebag,i))))) for i in 
          range(1,maximum_superleave_length+1)}

for i in range(1,maximum_superleave_length+1):
    leaves[i] = [''.join(leave) for leave in leaves[i]]
    
t1 = time.time()
print('Calculated superleaves up to length {} in {} seconds'.format(
    maximum_superleave_length,t1-t0))

Calculated superleaves up to length 5 in 19.01953411102295 seconds


The bottom creates the full set of leaves for all lengths from 1-5 (6 breaks on my local machine)

In [4]:
for i in range(1,6):
    print(i,len(leaves[i]))

1 27
2 373
3 3509
4 25254
5 148150


In [5]:
column_dict = {
    0:'rack',
    1:'score',
    2:'tiles_remaining'
}
df = pd.read_csv('log_games.csv', header=None, keep_default_na=False)
df.rename(columns=column_dict,inplace=True)

In [6]:
tile_limit = 1
df = df.loc[df['tiles_remaining']>=tile_limit]

Alphabetize rack

In [7]:
df['rack'] = df['rack'].apply(lambda x: ''.join(sorted(x)))

Create flag of whether each letter is contained in a rack, and also check for multiples

In [10]:
for multiple in range(1,6):
    t0 = time.time()

    # iterate through all 27 tiles
    for c in leaves[1]:

        if multiple*c in leaves[multiple]:
            df[multiple*c] = df['rack'].apply(lambda x: multiple*c in x)
            
    t1 = time.time()
    print('Added columns for all duplicates up to length {} in {} seconds'.format(multiple,t1-t0))

Added columns for all duplicates up to length 1 in 135.2876591682434 seconds
Added columns for all duplicates up to length 2 in 124.7717490196228 seconds
Added columns for all duplicates up to length 3 in 69.06989097595215 seconds
Added columns for all duplicates up to length 4 in 63.6707079410553 seconds
Added columns for all duplicates up to length 5 in 40.63369703292847 seconds


In [11]:
# following variable indicates what length of superleave we try to calculate up to.
# WARNING: might break for now if you try to do too long.
ev_calculator_max_length = 3

In [12]:
all_leaves = []

for i in range(1,ev_calculator_max_length+1):
    all_leaves += leaves[i]

ev_df = pd.DataFrame(columns=['mean','std','count','ev','synergy'],
                     index=all_leaves)

To find all of the racks corresponding to a particular leave, we have added columns to the dataframe of plays df marking each letter (A, B, C...) and also for duplicates (AA, BB, CC...) and triplicates where possible (AAA, DDD, EEE...).

If the letters in a given leave are all different, we can look for rows by using df['A']&df['B']. However, if there are duplicates involved, we have to look for df['AA']. The following function gives the correct dataframe columns to be looked up.

In [None]:
def get_columns(leave):
    letters=list(set(leave))
    tags = []
    
    for l in letters:
        tags += [sum([l==letter for letter in leave])*l]
    
    return tags

In [None]:
for leave_length in range(1,ev_calculator_max_length+1):
    print(leave_length)
    t0 = time.time()
    
    for leave in leaves[leave_length]:
        condition = df[get_columns(leave)].all(axis=1)
        ev_df.loc[leave]['mean'] = df.loc[condition]['score'].mean()
        ev_df.loc[leave]['std'] = df.loc[condition]['score'].std()
        ev_df.loc[leave]['count'] = len(df.loc[condition])
        
    t1 = time.time()
    print('Calculated mean, std and count in {} seconds'.format(t1-t0))

1
Calculated mean, std and count in 67.57009387016296 seconds
2


In [None]:
ev_df['pct'] = 100*ev_df['count']/len(df)
ev_df['ev'] = ev_df['mean']-df['score'].mean()

Calculate leave "synergy", in other words the difference between the EV of the rack and what we'd expect just from adding the individual values of the tiles

In [None]:
for leave_length in range(2,ev_calculator_max_length+1):
    for leave in leaves[leave_length]:
        ev_df.loc[leave]['synergy'] = ev_df.loc[leave]['ev']-\
                                      sum([ev_df.loc[c]['ev'] for c in leave])

In [None]:
ev_df

In [None]:
ev_df.to_csv('leave_values_010619_v3.csv')