# Get Kaggle Kernel Results

In [5]:
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import seaborn as sns

from kaggle.api.kaggle_api_extended import KaggleApi

sns.set()

## Connect to Kaggle

In [6]:
api = KaggleApi()
api.authenticate()



## Get Kernels

In [7]:
kernels = api.kernels_list(search='wordle-', mine=True, page_size=40)

In [8]:
# Download LF kernels
for kernel in kernels:
    if not (('wordlebot-lf' in kernel.ref or 'wordlebot-gyx-' in kernel.ref) and \
       api.kernels_status(kernel.ref)['status'] == 'complete'):
        continue
    
    experiment_name = kernel.ref.replace('chrischow/', '')
    
    if 'wordlebot-lf-' in kernel.ref:
        download_path = './kernel_output/lf/'
        filename = f"{experiment_name}.csv"
    elif 'wordlebot-gyx-' in kernel.ref:
        download_path = './kernel_output/gyx/'
        filename = f"{experiment_name.replace('gyx', 'expected_gyx')}.csv"
#     elif 'wordlebot-ncands-' in kernel.ref:
#         download_path = './kernel_output/ncands/'
    if not filename in os.listdir(download_path):
        print(f"Downloading {experiment_name}...")
        api.kernels_output(kernel.ref, path=download_path)

## Get Data

In [9]:
df_lf = pd.DataFrame()
all_data = pd.DataFrame()

for kernel in kernels:
    if 'wordlebot-lf-' in kernel.ref:
        names = kernel.ref.split('-')
        word = names[-1]
        method = names[1]
        temp_df = pd.read_csv(f"kernel_output/lf/{kernel.ref.replace('chrischow/', '')}.csv")
        
        with open(f"kernel_output/lf/{kernel.ref.replace('chrischow/', '')}.log") as f:
            temp_log = f.readlines()
        runtime = pd.DataFrame(eval(''.join(temp_log))).time.iloc[-1]
        
        temp_results = pd.DataFrame([{
            'Seed Word': word,
            'Ranking Algorithm': method,
            'Runtime': runtime,
            'Mean Steps': temp_df.steps.mean(),
            'Success Rate': temp_df.steps.le(6).mean(),
            '3-Steps or Less': temp_df.steps.le(3).mean(),
            'Worst Case': temp_df.steps.max()
        }])
        
        temp_df['method'] = 'lf'
        all_data = all_data.append(temp_df)
        df_lf = df_lf.append(temp_results)
        
# df_lf['steps_rank'] = df_lf['Mean Steps'].rank()
# df_lf['success_rank'] = df_lf['Success Rate'].rank(ascending=False)
# df_lf['threestep_rank'] = df_lf['3-Steps or Less'].rank(ascending=False)
# df_lf['max_rank'] = df_lf['Worst Case'].rank()
# df_lf['avg_rank'] = df_lf[['steps_rank', 'success_rank', 'threestep_rank']].mean(axis=1)

In [10]:
df_gyx = pd.DataFrame()

for kernel in kernels:
    if 'wordlebot-gyx-' in kernel.ref:
        if api.kernels_status(kernel.ref)['status'] != 'complete':
            continue
        names = kernel.ref.split('-')
        word = names[-1]
        method = names[1]
        temp_df = pd.read_csv(
            f"kernel_output/gyx/{kernel.ref.replace('chrischow/', '').replace('gyx', 'expected_gyx')}.csv"
        )
        
        with open(f"kernel_output/gyx/{kernel.ref.replace('chrischow/', '')}.log") as f:
            temp_log = f.readlines()
        runtime = pd.DataFrame(eval(''.join(temp_log))).time.iloc[-1]
        
        temp_results = pd.DataFrame([{
            'Seed Word': word,
            'Ranking Algorithm': method,
            'Runtime': runtime,
            'Mean Steps': temp_df.steps.mean(),
            'Success Rate': temp_df.steps.le(6).mean(),
            '3-Steps or Less': temp_df.steps.le(3).mean(),
            'Worst Case': temp_df.steps.max()
        }])
        
        temp_df['method'] = 'gyx'
        all_data = all_data.append(temp_df)
        df_gyx = df_gyx.append(temp_results)
        
# df_gyx['steps_rank'] = df_gyx['Mean Steps'].rank()
# df_gyx['success_rank'] = df_gyx['Success Rate'].rank(ascending=False)
# df_gyx['threestep_rank'] = df_gyx['3-Steps or Less'].rank(ascending=False)
# df_gyx['max_rank'] = df_gyx['Worst Case'].rank()
# df_gyx['avg_rank'] = df_gyx[['steps_rank', 'success_rank', 'threestep_rank']].mean(axis=1)

In [11]:
df = pd.concat([df_lf, df_gyx], axis=0, ignore_index=True)
df['steps_rank'] = df['Mean Steps'].rank()
df['success_rank'] = df['Success Rate'].rank(ascending=False)
df['threestep_rank'] = df['3-Steps or Less'].rank(ascending=False)
df['avg_rank'] = df[['steps_rank', 'success_rank', 'threestep_rank']].mean(axis=1)

In [12]:
df.sort_values('avg_rank')

Unnamed: 0,Seed Word,Ranking Algorithm,Runtime,Mean Steps,Success Rate,3-Steps or Less,Worst Case,steps_rank,success_rank,threestep_rank,avg_rank
13,tores,lf,232.967899,3.730022,0.991793,0.425054,8,2.0,4.5,2.5,3.0
5,stare,lf,236.935718,3.728726,0.991793,0.423758,9,1.0,4.5,4.0,3.166667
7,tares,lf,225.392809,3.739093,0.991793,0.419438,8,3.0,4.5,5.0,4.166667
12,tales,lf,233.820936,3.749028,0.994384,0.39568,8,6.0,1.0,10.0,5.666667
6,arose,lf,223.036801,3.739525,0.990065,0.429806,8,4.0,13.5,1.0,6.166667
3,raise,lf,232.002648,3.742549,0.988769,0.425054,9,5.0,17.5,2.5,8.333333
15,roate,lf,196.688738,3.757235,0.990497,0.417279,9,7.0,11.5,7.0,8.5
9,rales,lf,203.969526,3.784881,0.993952,0.387905,8,11.0,2.0,13.0,8.666667
10,rates,lf,239.049976,3.776242,0.990497,0.40432,9,9.0,11.5,8.0,9.5
1,arles,lf,234.079909,3.798704,0.990929,0.388337,10,12.0,8.5,12.0,10.833333


In [13]:
df.sort_values('Mean Steps').groupby('Ranking Algorithm').first()

Unnamed: 0_level_0,Seed Word,Runtime,Mean Steps,Success Rate,3-Steps or Less,Worst Case,steps_rank,success_rank,threestep_rank,avg_rank
Ranking Algorithm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
gyx,stare,10812.406851,3.831965,0.989201,0.377538,9,17.0,15.5,18.0,16.833333
lf,stare,236.935718,3.728726,0.991793,0.423758,9,1.0,4.5,4.0,3.166667


In [14]:
df.groupby('Seed Word')['Ranking Algorithm'].count()

Seed Word
arles    2
arose    2
dares    2
lares    2
lores    2
nares    2
raile    2
raise    2
rales    2
rates    2
reais    2
roate    2
soare    2
stare    2
tales    2
tares    2
tores    2
Name: Ranking Algorithm, dtype: int64

## All Data

In [15]:
if 'Unnamed: 0' in all_data.columns:
    all_data = all_data.drop('Unnamed: 0', axis=1)
all_data['words'] = all_data.words.apply(eval)
all_data['word'] = all_data.words.apply(lambda x: x[0])
all_data['solution'] = all_data.words.apply(lambda x: x[-1])

In [23]:
unsolved = all_data.groupby(['solution', 'method']).steps.mean().reset_index()
unsolved = unsolved.loc[unsolved.steps.gt(6)].sort_values('steps', ascending=False)

In [26]:
pd.options.display.max_rows=100
unsolved.sort_values('solution')

Unnamed: 0,solution,method,steps
292,baker,gyx,6.117647
317,baste,lf,6.941176
316,baste,gyx,7.235294
318,batch,gyx,7.294118
460,bluer,gyx,6.117647
514,boxer,gyx,7.823529
515,boxer,lf,7.764706
579,brown,lf,6.411765
578,brown,gyx,6.294118
624,buyer,gyx,6.529412
