# Hyperparameter Tuning Analysis

In [112]:
import pandas as pd
import plotly.express as px
pd.options.plotting.backend = "plotly"

In [113]:
df = pd.read_csv('hparams_table.csv')
df['batch_size'] = df['batch_size'].astype(int)
df['hidden_units'] = df['hidden_units'].astype(int)
df

Unnamed: 0,batch_size,hidden_units,learning_rate,model,1-gram %,2-gram %,3-gram %,4-gram %,train_loss,val_loss
0,64,512,0.0010,lstm2,95.906433,64.705879,15.976332,1.785714,0.987554,1.159926
1,32,256,0.0100,lstm2,88.304092,50.000000,16.568047,2.976191,1.264080,1.318123
2,128,256,0.0001,lstm2,53.757225,16.279070,1.169591,0.000000,1.655355,1.669039
3,64,256,0.0001,rnn,68.604652,30.409357,5.882353,0.591716,1.481291,1.509107
4,64,512,0.0010,rnn,90.532547,54.166668,15.568862,3.614458,1.174806,1.256873
...,...,...,...,...,...,...,...,...,...,...
103,64,768,0.0010,lstm,91.124260,52.976189,13.173653,0.602410,1.003962,1.157716
104,32,1024,0.0100,lstm,87.573967,47.023811,12.574850,3.012048,1.228965,1.286210
105,32,768,0.0001,lstm,88.505745,52.601154,12.790698,2.339181,1.213736,1.268522
106,64,512,0.0100,lstm,89.830505,54.545456,17.142857,2.298851,1.206797,1.269713


In [114]:
hyperparameters = ['batch_size', 'hidden_units', 'learning_rate']

print("Tested hyperparameters:")
print("Models:", sorted(df['model'].unique()))
for hp in hyperparameters:
    print(f"{hp.replace("_", " ").title()}: {sorted(df[hp].unique())}")

Tested hyperparameters:
Models: ['lstm', 'lstm2', 'rnn']
Batch Size: [32, 64, 128]
Hidden Units: [256, 512, 768, 1024]
Learning Rate: [0.0001, 0.001, 0.01]


## Parallel Coordinates Plot

In [115]:
# Check if 'val_loss' column exists before adding to dimensions
dimensions = ['batch_size', 'hidden_units', 'learning_rate', 'val_loss']

labels={'batch_size': 'Batch Size', 'hidden_units': 'Hidden Units', 
        'learning_rate': 'Learning Rate', 'val_loss': 'Val Loss'}

fig = px.parallel_coordinates(df, 
                              dimensions=dimensions,
                              color='val_loss', 
                              color_continuous_scale=px.colors.sequential.Viridis,
                              labels=labels,
                              title='Hyperparameter and Metric Parallel Coordinates')
fig.show()

## Best Hyperparameter Value Analysis

### Impact on N-gram Scores

In [116]:
ngram_cols = ['1-gram %', '2-gram %', '3-gram %', '4-gram %']

for hp in hyperparameters:
    # Group by the hyperparameter and calculate mean for all N-gram scores
    avg_ngram_by_hp = df.groupby(hp)[ngram_cols].mean().reset_index()
    
    # Melt the dataframe to long format for plotting
    avg_bleu_melted = avg_ngram_by_hp.melt(id_vars=hp, 
                                         value_vars=ngram_cols, 
                                         var_name='N-gram', 
                                         value_name='Average N-gram Score')
    
    # Plotting
    fig_title = f'Average N-gram Scores vs {hp.replace("_", " ").title()}'
    fig = px.bar(avg_bleu_melted, x=hp, y='Average N-gram Score', 
                 color='N-gram', barmode='group', 
                 title=fig_title, 
                 text_auto='.2f')
    
    fig.update_layout(xaxis_title=hp.replace("_", " ").title())
    fig.update_traces(textposition='outside') # Position text outside bars
    fig.update_xaxes(type='category') # Treat hyperparameter values as categorical
    fig.show()

### Impact on Validation Loss

In [117]:
for hp in hyperparameters:
    # Group by the hyperparameter and model, calculate mean val_loss
    avg_loss_by_hp_model = df.groupby([hp, 'model'])['val_loss'].mean().reset_index()
    
    # Plotting
    fig_title = f'Average Validation Loss vs {hp.replace("_", " ").title()} (Grouped by Model)'
    fig = px.bar(avg_loss_by_hp_model, x=hp, y='val_loss', 
                    color='model', barmode='group', 
                    title=fig_title, 
                    text_auto='.3f')
    fig.update_layout(
        xaxis_title=hp.replace("_", " ").title(),
        yaxis_title="Average Validation Loss"
    )
    fig.update_traces(textposition='outside')
    fig.update_xaxes(type='category') # Treat hyperparameter values as categorical
    fig.show()

## Top 5 Parameter Combinations per Model by validation loss

In [118]:
sort_metric = 'val_loss'
ascending_order = True
print(f'Finding best parameters based on lowest {sort_metric.replace("_", " ")}\n')

# Get unique model names
model_names = df['model'].unique()

# Find and print top 5 for each model
for model_name in model_names:
    print(f"--- Top 5 for Model: {model_name} ---")
    top_5 = df[df['model'] == model_name].sort_values(by=sort_metric, ascending=ascending_order).head(5)
    # Select relevant columns to display
    display_cols = ['model', 'batch_size', 'hidden_units', 'learning_rate', sort_metric, '1-gram %', '2-gram %', '3-gram %', '4-gram %']
    
    print(top_5[display_cols].to_string(index=False))
    print("\n")

Finding best parameters based on lowest val loss

--- Top 5 for Model: lstm2 ---
model  batch_size  hidden_units  learning_rate  val_loss  1-gram %  2-gram %  3-gram %  4-gram %
lstm2          64          1024          0.001  1.142094 94.082840 58.928570 18.562874  4.216867
lstm2          32          1024          0.001  1.143769 90.697678 65.497078 22.941177  4.142012
lstm2          32           768          0.001  1.146122 92.857140 68.508286 31.666666  7.821229
lstm2         128          1024          0.001  1.147708 93.023254 61.403507 26.470589  2.958580
lstm2          64           768          0.001  1.148422 94.350281 62.500000 19.428572  2.873563


--- Top 5 for Model: rnn ---
model  batch_size  hidden_units  learning_rate  val_loss  1-gram %  2-gram %  3-gram %  4-gram %
  rnn         128          1024          0.001  1.213275 95.652176 61.748634 19.230770  3.314917
  rnn          64          1024          0.001  1.220380 87.500000 53.142857 16.091953  3.468208
  rnn         1