# Get Kaggle Kernel Results

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import seaborn as sns

from kaggle.api.kaggle_api_extended import KaggleApi

sns.set()



## Connect to Kaggle

In [None]:
api = KaggleApi()
api.authenticate()



## Get Kernels

In [None]:
kernels = api.kernels_list(search='wordle-', mine=True, page_size=40)

In [None]:
# Download LF kernels
for kernel in kernels:
    if not (('wordlebot-lf' in kernel.ref or 'wordlebot-gyx-' in kernel.ref) and \
       api.kernels_status(kernel.ref)['status'] == 'complete'):
        continue
    
    experiment_name = kernel.ref.replace('chrischow/', '')
    
    if 'wordlebot-lf-' in kernel.ref:
        download_path = './kernel_output/lf/'
        filename = f"{experiment_name}.csv"
    elif 'wordlebot-gyx-' in kernel.ref:
        download_path = './kernel_output/gyx/'
        filename = f"{experiment_name.replace('gyx', 'expected_gyx')}.csv"
#     elif 'wordlebot-ncands-' in kernel.ref:
#         download_path = './kernel_output/ncands/'
    if not filename in os.listdir(download_path):
        print(f"Downloading {experiment_name}...")
        api.kernels_output(kernel.ref, path=download_path)

## Get Data

In [None]:
df_lf = pd.DataFrame()
all_data = pd.DataFrame()

for kernel in kernels:
    if 'wordlebot-lf-' in kernel.ref:
        names = kernel.ref.split('-')
        word = names[-1]
        method = names[1]
        temp_df = pd.read_csv(f"kernel_output/lf/{kernel.ref.replace('chrischow/', '')}.csv")
        
        with open(f"kernel_output/lf/{kernel.ref.replace('chrischow/', '')}.log") as f:
            temp_log = f.readlines()
        runtime = pd.DataFrame(eval(''.join(temp_log))).time.iloc[-1]
        
        temp_results = pd.DataFrame([{
            'Seed Word': word,
            'Ranking Algorithm': method,
            'Runtime': runtime,
            'Mean Steps': temp_df.steps.mean(),
            'Success Rate': temp_df.steps.le(6).mean(),
            '3-Steps or Less': temp_df.steps.le(3).mean(),
            'Worst Case': temp_df.steps.max()
        }])
        
        temp_df['method'] = 'lf'
        all_data = all_data.append(temp_df)
        df_lf = df_lf.append(temp_results)
        
# df_lf['steps_rank'] = df_lf['Mean Steps'].rank()
# df_lf['success_rank'] = df_lf['Success Rate'].rank(ascending=False)
# df_lf['threestep_rank'] = df_lf['3-Steps or Less'].rank(ascending=False)
# df_lf['max_rank'] = df_lf['Worst Case'].rank()
# df_lf['avg_rank'] = df_lf[['steps_rank', 'success_rank', 'threestep_rank']].mean(axis=1)

In [None]:
df_gyx = pd.DataFrame()

for kernel in kernels:
    if 'wordlebot-gyx-' in kernel.ref:
        if api.kernels_status(kernel.ref)['status'] != 'complete':
            continue
        names = kernel.ref.split('-')
        word = names[-1]
        method = names[1]
        temp_df = pd.read_csv(
            f"kernel_output/gyx/{kernel.ref.replace('chrischow/', '').replace('gyx', 'expected_gyx')}.csv"
        )
        
        with open(f"kernel_output/gyx/{kernel.ref.replace('chrischow/', '')}.log") as f:
            temp_log = f.readlines()
        runtime = pd.DataFrame(eval(''.join(temp_log))).time.iloc[-1]
        
        temp_results = pd.DataFrame([{
            'Seed Word': word,
            'Ranking Algorithm': method,
            'Runtime': runtime,
            'Mean Steps': temp_df.steps.mean(),
            'Success Rate': temp_df.steps.le(6).mean(),
            '3-Steps or Less': temp_df.steps.le(3).mean(),
            'Worst Case': temp_df.steps.max()
        }])
        
        temp_df['method'] = 'gyx'
        all_data = all_data.append(temp_df)
        df_gyx = df_gyx.append(temp_results)
        
# df_gyx['steps_rank'] = df_gyx['Mean Steps'].rank()
# df_gyx['success_rank'] = df_gyx['Success Rate'].rank(ascending=False)
# df_gyx['threestep_rank'] = df_gyx['3-Steps or Less'].rank(ascending=False)
# df_gyx['max_rank'] = df_gyx['Worst Case'].rank()
# df_gyx['avg_rank'] = df_gyx[['steps_rank', 'success_rank', 'threestep_rank']].mean(axis=1)

In [None]:
df = pd.concat([df_lf, df_gyx], axis=0, ignore_index=True)
df['steps_rank'] = df['Mean Steps'].rank()
df['success_rank'] = df['Success Rate'].rank(ascending=False)
df['threestep_rank'] = df['3-Steps or Less'].rank(ascending=False)
df['avg_rank'] = df[['steps_rank', 'success_rank', 'threestep_rank']].mean(axis=1)

In [None]:
df.sort_values('avg_rank')

In [13]:
df.sort_values('Mean Steps').groupby('Ranking Algorithm').first()

In [14]:
df.groupby('Seed Word')['Ranking Algorithm'].count()

## All Data

In [None]:
all_data = all_data.drop('Unnamed: 0', axis=1)
all_data['words'] = all_data.words.apply(eval)
all_data['word'] = all_data.words.apply(lambda x: x[0])
all_data['solution'] = all_data.words.apply(lambda x: x[-1])