# Fuzzy pattern LM experiments

In [None]:
from itertools import product
import numpy as np
import os
import pandas as pd
import time
from fuzzy_lm_experiment import FuzzyPatternLMExperiment, DatasetABA

In [None]:
def run(train_vocab_size=50):
    
    max_iter = 150
    embed_dims = [2, 10, 25, 50, 100]
    hidden_dims = [2, 10, 25, 50, 100]
    alphas = [0.00001, 0.0001, 0.001]
    learning_rates = [0.0001, 0.001, 0.01]
            
    grid = (embed_dims, hidden_dims, alphas, learning_rates)
    grid = list(product(*grid))
    
    print(f"Running {len(grid)} experiments")
    
    data = []
    
    for embed_dim, hidden_dim, alpha, lr in grid:
        
        start = time.time()
        
        print(f"{embed_dim} {hidden_dim} {alpha} {lr}", end="...")
        
        experiment = FuzzyPatternLMExperiment(
            dataset_class=DatasetABA,
            embed_dim=embed_dim,
            hidden_dim=hidden_dim,
            n_trials=20,
            train_vocab_size=train_vocab_size,
            max_iter=max_iter,
            pretrain=False)
        
        preds = experiment.run()
        
        mu = np.mean([p['accuracy'] for p in preds])        
        elapsed_time = round(time.time() - start, 0)        
        print(f"mean: {mu}; took {elapsed_time} secs")
        
        for p in preds:
            p.update({
                'embed_dim': embed_dim, 
                'hidden_dim': hidden_dim,
                'train_vocab_size': train_vocab_size,
                'alpha': alpha,
                'learning_rate': lr,
                'max_iter': max_iter})
            data.append(p)
            
    return pd.DataFrame(data)                    

In [None]:
def run_vocab_size_experiment(train_vocab_size, json=False):
    df = run(train_vocab_size=train_vocab_size)
    
    output_filename = os.path.join(
        "results", 
        "fuzzy-lm-results-vocab{}.csv".format(train_vocab_size))
    
    if json:
        json_output_filename = output_filename.replace(".csv", ".json")
        df.to_json(json_output_filename, orient='records')
    
    df.drop(['correct', 'incorrect', 'n_correct', 'n_incorrect'], axis=1, inplace=True)
    df.to_csv(output_filename, index=None)

In [None]:
run_vocab_size_experiment(train_vocab_size=50)

In [None]:
run_vocab_size_experiment(train_vocab_size=20)

In [None]:
run_vocab_size_experiment(train_vocab_size=10)