#  Experiment Results

In [None]:
def element_frequency(list):
    result = {}
    for element in list:
        if element in result:
            result[element] += 1
        else:
            result[element] = 1
    return result

In [None]:
REPORT_DIR = "reports"

In [None]:
import pandas as pd
import json
import os
import matplotlib.pyplot as plt
import yaml
import seaborn as sns

pd.set_option('display.max_columns', None)

column_names = pd.DataFrame(
    [
        ["Model", ""], 
        ["Version",""],        
        ['Language', ""],              
        ["# Attempts", "Mean"], 

        ["% Completion", "Mean"], 

        ["Wheel", "% lost matches"],
        ["Wheel", "% win matches"],

        ["Wheel", "mean vowels buyed"],
        ["Wheel", "Mean duplicated letters"],
        ["Wheel", 'Mean final budget'],

        ['Guesses','Guesses len mean (right|wrong)'],

        ['Strategy', '# first couple diversity'],
        ['Strategy', '# first triple diversity'],

        ["Game lost", "% insufficent budget error"],
        ["Game lost", "% round limit error"],
        ['Game lost', '% letter not in sentence error'],
        ['Game lost', '% vowel not allowed error'],
        ['Game lost', '% consonant not allowed error'],
        ['Game lost', '% guess error'],
        ['Game lost', '% instruction error'],
        ['Log','Folder'],
    ]
)

rows = []

couple_letter_freq = {}
triple_letter_freq = {}

for experiment_dir in os.listdir(REPORT_DIR):

    stat_file = os.path.join(REPORT_DIR, experiment_dir, "stats.json")

    if os.path.exists(stat_file):
        # Open conf file.
        for file in os.listdir(os.path.join(REPORT_DIR, experiment_dir)):
            if file.endswith(".yml"):
                exp_file = file[:-4]
                #break
                with open(os.path.join(REPORT_DIR, experiment_dir, file)) as stream:
                    try:
                        yaml_file = yaml.safe_load(stream)
                    except yaml.YAMLError as exc:
                        print(exc)
                break


        if 'language' in yaml_file:
            lang = yaml_file['language']
        else:
            lang = "en"

        # Open stat file.
        with open(stat_file, "r") as f:
            stats = json.load(f)

        if 'letter_distribution' in stats:
            letter_order = stats['letter_distribution']['letters_order']
            letters_freq = stats['letter_distribution']['letters_freq']
            df_letter_freq = pd.DataFrame.from_dict(letter_order, orient='index').fillna(0)

            first_2, first_3 = [], []
            for index, letters in letters_freq.items():
                el = letters[0:2]
                if len(el) < 2:
                    continue
                first_2.append(el[0]+el[1])
                el = letters[0:3]
                if len(el) < 3:
                    continue
                first_3.append(el[0]+el[1]+el[2])


            couple_freq = element_frequency(first_2)
            triple_freq = element_frequency(first_3)

            couple_diversity = len(couple_freq.keys())
            triple_diversity = len(triple_freq.keys())

            couple_letter_freq[yaml_file['player']['name']+" - "+yaml_file['version']] = couple_freq
            triple_letter_freq[yaml_file['player']['name']+" - "+yaml_file['version']] = triple_freq
        

        # try:
        letter_not_in_sentence = float(stats['wheel']['% letter not in sentence'])
        vowel_not_allowed_error = float(stats['wheel']['% vowel not allowed error'])
        consonant_not_allowed_error = float(stats['wheel']['% consonant not allowed error'])
        guess_error = float(stats['wheel']['% guess error'])
        instruction_error = float(stats['wheel']['% instruction_error'])

        rows.append(
        [
            exp_file,
            yaml_file['version'],
            lang,
            f"{stats['attempts']['mean']} ± {stats['attempts']['std']}", 
            f"{stats['completion%']['mean']} ± {stats['completion%']['std']}", 
            float(stats['wheel']['% lost matches']),
            float(stats['wheel']['% win matches']),
            f"{stats['wheel']['mean vowels buyed']} ± {stats['wheel']['std vowels buyed']}",
            f"{stats['wheel']['mean duplicated letters']} ± {stats['wheel']['std duplicated letters']}",
            f"{stats['wheel']['mean budget']} ± {stats['wheel']['std budget']}",
            f"{float(stats['wheel']['right guesses length mean'])} | {float(stats['wheel']['wrong guesses length mean'])}",
            f"{couple_diversity:.0f}",
            f"{triple_diversity:.0f}",
            float(stats['wheel']['% insufficent budget']),
            float(stats['wheel']['% round limit error']),
            letter_not_in_sentence,
            vowel_not_allowed_error,
            consonant_not_allowed_error,
            guess_error,
            instruction_error,
            experiment_dir,
        ]
        )
    
        

fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(25, 6))

# Plot couple frequency
couple_data = []
for key, freq_dict in couple_letter_freq.items():
    for couple, freq in freq_dict.items():
        couple_data.append((couple, freq, key))

df_couple = pd.DataFrame(couple_data, columns=["Couple", "Frequency", "Model"])
couple_order = df_couple.groupby('Couple')['Frequency'].sum().sort_values(ascending=False).index
sns.barplot(
    x="Couple",
    y="Frequency",
    hue="Model",
    data=df_couple,
    ax=axes[0],
    order=couple_order
)
axes[0].set_title('Couple Frequency')
axes[0].set_xlabel('Couples')
axes[0].set_ylabel('Frequency')

# Plot triple frequency
triple_data = []
for key, freq_dict in triple_letter_freq.items():
    for triple, freq in freq_dict.items():
        triple_data.append((triple, freq, key))

df_triple = pd.DataFrame(triple_data, columns=["Triple", "Frequency", "Model"])
triple_order = df_triple.groupby('Triple')['Frequency'].sum().sort_values(ascending=False).index
sns.barplot(
    x="Triple",
    y="Frequency",
    hue="Model",
    data=df_triple,
    ax=axes[1],
    order=triple_order
)
axes[1].set_title('Triple Frequency')
axes[1].set_xlabel('Triples')
axes[1].set_ylabel('Frequency')

plt.tight_layout()
plt.show()

columns = pd.MultiIndex.from_frame(column_names)

df = pd.DataFrame(rows, columns=columns)
df.sort_values(by=('Wheel','% win matches'), ascending=False, inplace=True, ignore_index=True)

df

In [None]:
# Filter for only the no-letter-loss version
df_nll = df[df[('Version', '')] == 'no-letter-loss']

# Extract # Attempts mean and guess error for each model (no-letter-loss only)
attempts_mean_nll = df_nll[('# Attempts', 'Mean')].str.split('±').str[0].astype(float)
guess_error_nll = df_nll[('Game lost', '% guess error')]

# Create scatter plot
plt.figure(figsize=(10, 6))
plt.scatter(attempts_mean_nll, guess_error_nll)

# Annotate each point with the model name
for i, txt in enumerate(df_nll[('Model', '')]):
    plt.annotate(txt, (attempts_mean_nll.iloc[i], guess_error_nll.iloc[i]), fontsize=9, xytext=(5,5), textcoords='offset points')

plt.xlabel('# Attempts (Mean)')
plt.ylabel('% Guess Error')
plt.title('Guess Error vs # Attempts per Model (no-letter-loss only)')
plt.grid(True)
plt.show()