In [None]:
## Install the required libraries and packages ##
%%capture  # Suppresses the output of the installations

!pip install json  # Library for working with JSON data, which is used for storing and sending data
!pip install matplotlib  # Library useful for making graphs and charts
!pip install numpy  # Library for handling numerical computations, can also be useful for positioning bars on graphs
!pip install pandas  # Library for data manipulation and analysis, useful for handling datasets

In [None]:
## Import needed packages ##

import json
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [None]:
## Paths to the trainer_state, which records the training process of each model ##

path_to_females_under_18 = r'\trainer_state_female_under_18.json'
path_to_females_18_to_49 = r'\trainer_state_female_18_to_49.json'
path_to_females_over_49 = r'\trainer_state_female_over_49.json'
path_to_males_under_18 = r'\trainer_state_male_under_18.json'
path_to_males_18_to_19 = r'\trainer_state_male_18_to_49.json'
path_to_males_over_49 = r'\trainer_state_male_over_49.json'

In [None]:
## Create a table for the results of the training for the female_under_18 model ##

## Load the data from the JSON file ##
with open(path_to_females_under_18) as f:
    data = json.load(f)

## Initialize the lists for training loss, validation loss, and WER-scores ##
train_loss = []
val_loss = []
wer = []
steps = []

## Extract the data ##
for log in data['log_history']:
    if 'loss' in log:
        train_loss.append(log['loss'])
    if 'eval_loss' in log and 'eval_wer' in log:
        val_loss.append(log['eval_loss'])
        wer.append(log['eval_wer'])
        steps.append(log['step'])

## Create a pandas DataFrame ##
df = pd.DataFrame({
    'Step': steps,
    'Training Loss': train_loss,
    'Validation Loss': [round(x, 4) for x in val_loss],
    'WER': [round(x, 4) for x in wer]
})

## Convert the 'Step' column to integers ##
df['Step'] = df['Step'].astype(int)

## Convert all values to strings, format decimals appropriately ##
df = df.applymap(lambda x: f"{x:.4f}" if isinstance(x, float) else str(x))

## Plot the pandas DataFrame as a table and remove axis ##
fig, ax = plt.subplots(figsize=(10, 4))
ax.axis('tight')
ax.axis('off')
the_table = ax.table(cellText=df.values,
         colLabels=df.columns,
         cellLoc = 'center', 
         loc='center')

## Add a title to the table with plt.title ##
plt.title('Training results for the female_under_18 model', pad=150)

## Adjust the height and width of the cells in the table ##
table_props = the_table.properties()
table_cells = table_props['children']
for cell in table_cells:
    cell.set_height(0.10)
    cell.set_width(0.16)

## Change the font size ##
the_table.auto_set_font_size(False)
the_table.set_fontsize(10)

In [None]:
## Visual representation of the training process and evaluation for the female_under_18 model ##

## Load the data from the JSON file ##
with open(path_to_females_under_18) as f:
    data = json.load(f)

## Initialize the lists for training loss, validation loss, and WER-score ##
train_loss = []
val_loss = []
wer = []
steps = []

## Extract the data ##
for log in data['log_history']:
    if 'loss' in log:
        train_loss.append(log['loss'])
    if 'eval_loss' in log and 'eval_wer' in log:
        val_loss.append(log['eval_loss'])
        wer.append(log['eval_wer'])
        steps.append(log['step'])

## Create the plot ##
plt.figure(figsize=(12, 8))
plt.plot(steps, train_loss, 'r', label='Training loss')
plt.plot(steps, val_loss, 'b', label='Validation loss')
plt.plot(steps, wer, 'g', label='WER-score')

## Add title and labels ##
plt.title('Training results for the female_under_18 model')
plt.xlabel('Steps')
plt.ylabel('Loss/WER')

## Define the x-ticks to be used ##
xticks = np.arange(min(steps) - min(steps) % 500, max(steps) + 500, 500)
plt.xticks(xticks, rotation=45)

plt.legend()

## Display the graph ##
plt.show()

In [None]:
## Create a table for the results of the training for the female_18_to_49 model ##

## Load the data from the JSON file ##
with open(path_to_females_18_to_49) as f:
    data = json.load(f)

## Initialize the lists for training loss, validation loss, and WER-scores ##
train_loss = []
val_loss = []
wer = []
steps = []

## Extract the data ##
for log in data['log_history']:
    if 'loss' in log:
        train_loss.append(log['loss'])
    if 'eval_loss' in log and 'eval_wer' in log: 
        val_loss.append(log['eval_loss'])
        wer.append(log['eval_wer'])
        steps.append(log['step'])

## Create a pandas DataFrame ##
df = pd.DataFrame({
    'Step': steps,
    'Training Loss': train_loss,
    'Validation Loss': [round(x, 4) for x in val_loss], 
    'WER': [round(x, 4) for x in wer] 
})

## Convert the 'Step' column to integers ##
df['Step'] = df['Step'].astype(int)

## Convert all values to strings, format decimals appropriately ##
df = df.applymap(lambda x: f"{x:.4f}" if isinstance(x, float) else str(x))

## Plot the pandas DataFrame as a table and remove axis ##
fig, ax = plt.subplots(figsize=(10, 4))
ax.axis('tight')
ax.axis('off')
the_table = ax.table(cellText=df.values,
         colLabels=df.columns,
         cellLoc = 'center', 
         loc='center')

##  Add a title with plt.title ##
plt.title('Training results for the female_18_to_49 model', pad=150)

## Adjust the height and width of the cells in the table ##
table_props = the_table.properties()
table_cells = table_props['children']
for cell in table_cells:
    cell.set_height(0.10)
    cell.set_width(0.16)

## Change the font size ##
the_table.auto_set_font_size(False)
the_table.set_fontsize(10)


In [None]:
## Visual representation of the training process and evaluation for the female_18_to_49 model ##

## Load the data from the JSON file ##
with open(path_to_females_18_to_49) as f:
    data = json.load(f)

## Initialize the lists for training loss, validation loss, and WER-scores ##
train_loss = []
val_loss = []
wer = []
steps = []

## Extract the data ##
for log in data['log_history']:
    if 'loss' in log: 
        train_loss.append(log['loss'])
    if 'eval_loss' in log and 'eval_wer' in log: 
        val_loss.append(log['eval_loss'])
        wer.append(log['eval_wer'])
        steps.append(log['step'])

## Create the plot ##
plt.figure(figsize=(12, 8))
plt.plot(steps, train_loss, 'r', label='Training loss')
plt.plot(steps, val_loss, 'b', label='Validation loss')
plt.plot(steps, wer, 'g', label='WER-score')

## Add title and labels ##
plt.title('Training results for the female_18_to_49 model')
plt.xlabel('Steps')
plt.ylabel('Loss/WER')

## Define the x-ticks to be used ##
xticks = np.arange(min(steps) - min(steps) % 500, max(steps) + 500, 500)
plt.xticks(xticks, rotation=45)

plt.legend()

## Display the graph ##
plt.show()


In [None]:
## Create a table for the results of the training for the female_over_49 model ##

## Load the data from the JSON file ##
with open(path_to_females_over_49) as f:
    data = json.load(f)

## Initialize the lists for training loss, validation loss, and WER-scores ##
train_loss = []
val_loss = []
wer = []
steps = []

## Extract the data ##
for log in data['log_history']:
    if 'loss' in log: 
        train_loss.append(log['loss'])
    if 'eval_loss' in log and 'eval_wer' in log:
        val_loss.append(log['eval_loss'])
        wer.append(log['eval_wer'])
        steps.append(log['step'])

## Create a pandas  DataFrame ##
df = pd.DataFrame({
    'Step': steps,
    'Training Loss': train_loss,
    'Validation Loss': [round(x, 4) for x in val_loss], 
    'WER': [round(x, 4) for x in wer] 
})

## Convert the 'Step' column to integers ##
df['Step'] = df['Step'].astype(int)

## Convert all values to strings, format decimals appropriately ##
df = df.applymap(lambda x: f"{x:.4f}" if isinstance(x, float) else str(x))

## Plot the pandas DataFrame as a table and remove axis ##
fig, ax = plt.subplots(figsize=(10, 4)) 
ax.axis('tight')
ax.axis('off')
the_table = ax.table(cellText=df.values,
         colLabels=df.columns,
         cellLoc = 'center', 
         loc='center')

## Add a title with plt.title ##
plt.title('Training results for the female_over_49 model', pad=130)

## Adjust the height and width of the cells in the table ##
table_props = the_table.properties()
table_cells = table_props['children']
for cell in table_cells:
    cell.set_height(0.10)
    cell.set_width(0.16)

## Change font size ##
the_table.auto_set_font_size(False)
the_table.set_fontsize(10)

In [None]:
## Visual representation of the training process and evaluation for the female_over_49 model ##

## Load the data from the JSON file ##
with open(path_to_females_over_49) as f:
    data = json.load(f)

## Initialize the lists for training loss, validation loss, and WER-scores ##
train_loss = []
val_loss = []
wer = []
steps = []

## Extract the data ##
for log in data['log_history']:
    if 'loss' in log:
        train_loss.append(log['loss'])
    if 'eval_loss' in log and 'eval_wer' in log:
        val_loss.append(log['eval_loss'])
        wer.append(log['eval_wer'])
        steps.append(log['step'])

## Create the plot ##
plt.figure(figsize=(12, 8))
plt.plot(steps, train_loss, 'r', label='Training loss')
plt.plot(steps, val_loss, 'b', label='Validation loss')
plt.plot(steps, wer, 'g', label='WER-score')

## Add title and labels ##
plt.title('Training results for the female_over_49 model')
plt.xlabel('Steps')
plt.ylabel('Loss/WER')

## Define the x-ticks to be used ##
xticks = np.arange(min(steps) - min(steps) % 500, max(steps) + 500, 500)
plt.xticks(xticks, rotation=45)  # rotate the labels on the x-axis

plt.legend()

## Display the plot ##
plt.show()

In [None]:
## Create a table for the results of the training for the male_under_18 model ##

## Load the data from the JSON file ##
with open(path_to_males_under_18) as f:
    data = json.load(f)

## Initialize the lists for training loss, validation loss, and WER ##
train_loss = []
val_loss = []
wer = []
steps = []

## Extract the data ##
for log in data['log_history']:
    if 'loss' in log: 
        train_loss.append(log['loss'])
    if 'eval_loss' in log and 'eval_wer' in log: 
        val_loss.append(log['eval_loss'])
        wer.append(log['eval_wer'])
        steps.append(log['step'])

## Create a pandas DataFrame ##
df = pd.DataFrame({
    'Step': steps,
    'Training Loss': train_loss,
    'Validation Loss': [round(x, 4) for x in val_loss], 
    'WER': [round(x, 4) for x in wer] 
})

## Convert the 'Step' column to integers ##
df['Step'] = df['Step'].astype(int)

## Convert all values to strings, format decimals appropriately ##
df = df.applymap(lambda x: f"{x:.4f}" if isinstance(x, float) else str(x))

## Plot the pandas DataFrame as a table and remove axis ##
fig, ax = plt.subplots(figsize=(10, 4))
ax.axis('tight')
ax.axis('off')
the_table = ax.table(cellText=df.values,
         colLabels=df.columns,
         cellLoc = 'center', 
         loc='center')

## Add a title with plt.title ##
plt.title('Training results for the male_under_18 model', pad=150)

## Adjust the height and width of the cells in the table ##
table_props = the_table.properties()
table_cells = table_props['children']
for cell in table_cells:
    cell.set_height(0.10)
    cell.set_width(0.16)

## Change the font size ##
the_table.auto_set_font_size(False)
the_table.set_fontsize(10)

In [None]:
## Visual representation of the training process and evaluation for the male_under_18 model ##

## Load the data from the JSON file ##
with open(path_to_males_under_18) as f:
    data = json.load(f)

## Initialize the lists for training loss, validation loss, and WER-scores ##
train_loss = []
val_loss = []
wer = []
steps = []

## Extract the data ##
for log in data['log_history']:
    if 'loss' in log:
        train_loss.append(log['loss'])
    if 'eval_loss' in log and 'eval_wer' in log:
        val_loss.append(log['eval_loss'])
        wer.append(log['eval_wer'])
        steps.append(log['step'])

## Create the plot ##
plt.figure(figsize=(12, 8))
plt.plot(steps, train_loss, 'r', label='Training loss')
plt.plot(steps, val_loss, 'b', label='Validation loss')
plt.plot(steps, wer, 'g', label='WER-score')

## Add title and labels ##
plt.title('Training results for the male_under_18 model')
plt.xlabel('Steps')
plt.ylabel('Loss/WER')

## Define the x-ticks to be used ##
xticks = np.arange(min(steps) - min(steps) % 500, max(steps) + 500, 500) 
plt.xticks(xticks, rotation=45)  # rotate the labels on the x-axis

plt.legend()

## Display the plot ##
plt.show()

In [None]:
## Create a table for the results of the training for the male_18_to_49 model ##

## Load the data from the JSON file ##
with open(path_to_males_18_to_19) as f:
    data = json.load(f)

## Initialize the lists for training loss, validation loss, and WER-scores ##
train_loss = []
val_loss = []
wer = []
steps = []

## Extract the data ##
for log in data['log_history']:
    if 'loss' in log:  # 
        train_loss.append(log['loss'])
    if 'eval_loss' in log and 'eval_wer' in log: 
        val_loss.append(log['eval_loss'])
        wer.append(log['eval_wer'])
        steps.append(log['step'])

## Create a pandas DataFrame ###
df = pd.DataFrame({
    'Step': steps,
    'Training Loss': train_loss,
    'Validation Loss': [round(x, 4) for x in val_loss],
    'WER': [round(x, 4) for x in wer] 
})

## Convert the 'Step' column to integers ##
df['Step'] = df['Step'].astype(int)

## Convert all values to strings, format decimals appropriately ##
df = df.applymap(lambda x: f"{x:.4f}" if isinstance(x, float) else str(x))

## Plot the DataFrame as a table and remove axiss ##
fig, ax = plt.subplots(figsize=(10, 4))
ax.axis('tight')
ax.axis('off')
the_table = ax.table(cellText=df.values,
         colLabels=df.columns,
         cellLoc = 'center', 
         loc='center')

## Add a title with plt.title ##
plt.title('Training results for the male_18_to_49', pad=140)

## Adjust the height and width of the cells in the table ##
table_props = the_table.properties()
table_cells = table_props['children']
for cell in table_cells:
    cell.set_height(0.10)
    cell.set_width(0.16)

## Change the font size ##
the_table.auto_set_font_size(False)
the_table.set_fontsize(10)

In [None]:
## Visual representation of the training process and evaluation for the male_18_to_49 model ##

## Load the data from the JSON file ##
with open(path_to_males_18_to_19) as f:
    data = json.load(f)

## Initialize the lists for training loss, validation loss, and WER-scores ##
train_loss = []
val_loss = []
wer = []
steps = []

## Extract the data ##
for log in data['log_history']:
    if 'loss' in log:
        train_loss.append(log['loss'])
    if 'eval_loss' in log and 'eval_wer' in log:
        val_loss.append(log['eval_loss'])
        wer.append(log['eval_wer'])
        steps.append(log['step'])

## Create the plot ##
plt.figure(figsize=(12, 8))
plt.plot(steps, train_loss, 'r', label='Training loss')
plt.plot(steps, val_loss, 'b', label='Validation loss')
plt.plot(steps, wer, 'g', label='WER-score')

## Add title and labels ##
plt.title('Training results for the male_18_to_49 model')
plt.xlabel('Steps')
plt.ylabel('Loss/WER')

## Define the x-ticks to be used ##
xticks = np.arange(min(steps) - min(steps) % 500, max(steps) + 500, 500)
plt.xticks(xticks, rotation=45)  # rotate the labels on the x-axis

plt.legend()

## Display the plot ##
plt.show()

In [None]:
## Create a table for the results of the training for the male_over_49 model ##

## Load the data from the JSON file ##
with open(path_to_males_over_49) as f:
    data = json.load(f)

## Initialize the lists for training loss, validation loss, and WER-scores ##
train_loss = []
val_loss = []
wer = []
steps = []

## Extract the data ##
for log in data['log_history']:
    if 'loss' in log: 
        train_loss.append(log['loss'])
    if 'eval_loss' in log and 'eval_wer' in log: 
        val_loss.append(log['eval_loss'])
        wer.append(log['eval_wer'])
        steps.append(log['step'])

## Create a pandas DataFrame ##
df = pd.DataFrame({
    'Step': steps,
    'Training Loss': train_loss,
    'Validation Loss': [round(x, 4) for x in val_loss], 
    'WER': [round(x, 4) for x in wer]
})

## Convert the 'Step' column to integers ##
df['Step'] = df['Step'].astype(int)

## Convert all values to strings, format decimals appropriately ##
df = df.applymap(lambda x: f"{x:.4f}" if isinstance(x, float) else str(x))

## Plot DataFrame as a table and remove axis ##
fig, ax = plt.subplots(figsize=(10, 4))
ax.axis('tight')
ax.axis('off')
the_table = ax.table(cellText=df.values,
         colLabels=df.columns,
         cellLoc = 'center', 
         loc='center')

## Add a title with plt.title ##
plt.title('Training results for the male_over_49', pad=130)

## Adjust the height and width of the cells in the table ##
table_props = the_table.properties()
table_cells = table_props['children']
for cell in table_cells:
    cell.set_height(0.10)
    cell.set_width(0.16)

## Change the font size ##
the_table.auto_set_font_size(False)
the_table.set_fontsize(10)

In [None]:
## Visual representation of the training process and evaluation for the male_over_49 model ##

## Load the data from the JSON file ##
with open(path_to_males_over_49) as f:
    data = json.load(f)

## Initialize the lists for training loss, validation loss, and WER-scores ##
train_loss = []
val_loss = []
wer = []
steps = []

## Extract the data ##
for log in data['log_history']:
    if 'loss' in log:
        train_loss.append(log['loss'])
    if 'eval_loss' in log and 'eval_wer' in log:
        val_loss.append(log['eval_loss'])
        wer.append(log['eval_wer'])
        steps.append(log['step'])

## Create the plot ##
plt.figure(figsize=(12, 8))
plt.plot(steps, train_loss, 'r', label='Training loss')
plt.plot(steps, val_loss, 'b', label='Validation loss')
plt.plot(steps, wer, 'g', label='WER-score')

## Add title and labels ##
plt.title('Training results for the male_over_49 model')
plt.xlabel('Steps')
plt.ylabel('Loss/WER')

## Define the x-ticks to be used ##
xticks = np.arange(min(steps) - min(steps) % 300, max(steps) + 300, 300)
plt.xticks(xticks, rotation=45)  # rotate the labels on the x-axis

plt.legend()

## Display the plot ##
plt.show()

In [None]:
## Merge the results from all the models into one chart for better visualization ##

## Function to plot the bar chart ##
def plot_bar_chart(json_files):

    ## List of labels for the bars ##
    labels = ['Training Loss', 'Validation Loss', 'WER-score']

    ## Define a list of colors for the bars of each model ##
    colors = ['red', 'blue', 'green']

    ## Initialize the lists to store final values for each model ##
    final_training_losses = []
    final_validation_losses = []
    final_wers = []

    for json_file in json_files:
        with open(json_file, 'r') as f:
            data = json.load(f)
        
        log_history = data['log_history']

        # Extract the final data
        final_training_loss = [log['loss'] for log in log_history if 'loss' in log][-1]
        final_validation_loss = [log['eval_loss'] for log in log_history if 'eval_loss' in log][-1]
        final_wer = [log['eval_wer'] for log in log_history if 'eval_wer' in log][-1]

        final_training_losses.append(final_training_loss)
        final_validation_losses.append(final_validation_loss)
        final_wers.append(final_wer)

    ## Width of the bars ##
    width = 0.1

    ## Position of the bars on x-axis ##
    ind = np.arange(len(final_training_losses))
    
    fig = plt.figure(figsize=(10,5))
    ax = fig.add_axes([0,0,1,1])

    ## Creating the bars ##
    bar1 = [ax.bar(ind + width*i, final_values, width, color=colors[i]) for i, final_values in enumerate([final_training_losses, final_validation_losses, final_wers])]

    ## Adding the labels to the bars ##
    ax.set_xticks(ind + width)
    ax.set_xticklabels(['female_under_18', 'female_18_to_49', 'female_over_49', 'male_under_18', 'male_18_to_49', 'male_over_49'])
    ax.legend(labels=labels)

    ## Adding the labels to the axis' ##
    plt.xlabel('Models')
    plt.ylabel('Loss/WER')
    plt.title('Comparison of Final Training Loss, Validation Loss and WER of Models')

    plt.show()

## Path to the JSON files that will be used for the plot ##
json_files = [path_to_females_under_18, 
              path_to_females_18_to_49, 
              path_to_females_over_49, 
              path_to_males_under_18, 
              path_to_males_18_to_19, 
              path_to_males_over_49]
plot_bar_chart(json_files)