# Final Training Plots Generator

This notebook was used to create various plots and graphs from the results of the final training. 

These provide additional insights to the tensorboard. 

## Notebook-Setup

In [1]:
import os
import pandas as pd
from tensorflow.python.summary.summary_iterator import summary_iterator
import datetime

## Extract Tensorboard Metrics

In [2]:
def extract_tensorboard_metrics(logdir):
    all_scalars = {}

    for root, dirs, files in os.walk(logdir):
        for file in files:
            if "events.out.tfevents" in file:
                event_file = os.path.join(root, file)
                for summary in summary_iterator(event_file):
                    for value in summary.summary.value:
                        if value.tag not in all_scalars:
                            all_scalars[value.tag] = []
                        all_scalars[value.tag].append({
                            "wall_time": summary.wall_time,
                            "step": summary.step,
                            "value": value.simple_value
                        })

    return all_scalars

def convert_to_dataframe(all_scalars):
    # Create a dictionary of DataFrames
    dataframes = {tag: pd.DataFrame(scalars) for tag, scalars in all_scalars.items()}
    
    # Combine all DataFrames into a single DataFrame with multi-level column indexing
    combined_df = pd.concat(dataframes, axis=1)
    
    # Flatten the multi-level column indexing
    combined_df.columns = ['_'.join(col).strip() for col in combined_df.columns.values]
    
    return combined_df

In [3]:
logdir = 'runs/May22_23-13-35_deep-learning_FinalTraining_v1/'
all_scalars = extract_tensorboard_metrics(logdir)
df = convert_to_dataframe(all_scalars)

# Display the DataFrame
df.head()

Instructions for updating:
Use eager execution and: 
`tf.data.TFRecordDataset(path)`


Unnamed: 0,args/text_summary_wall_time,args/text_summary_step,args/text_summary_value,model_config/text_summary_wall_time,model_config/text_summary_step,model_config/text_summary_value,train/loss_wall_time,train/loss_step,train/loss_value,train/grad_norm_wall_time,...,train/train_samples_per_second_value,train/train_steps_per_second_wall_time,train/train_steps_per_second_step,train/train_steps_per_second_value,train/total_flos_wall_time,train/total_flos_step,train/total_flos_value,train/train_loss_wall_time,train/train_loss_step,train/train_loss_value
0,1716412000.0,0.0,0.0,1716412000.0,0.0,0.0,1716413000.0,50.0,0.3284,1716413000.0,...,0.088,1716544000.0,11538.0,0.088,1716544000.0,11538.0,8.008382e+17,1716544000.0,11538.0,0.029696
1,,,,,,,1716413000.0,100.0,0.3152,1716413000.0,...,,,,,,,,,,
2,,,,,,,1716414000.0,150.0,0.3282,1716414000.0,...,,,,,,,,,,
3,,,,,,,1716414000.0,200.0,0.2564,1716414000.0,...,,,,,,,,,,
4,,,,,,,1716415000.0,250.0,0.1827,1716415000.0,...,,,,,,,,,,


### Export Tensorboard Metrics to CSV

In [7]:
# export to csv
df.to_csv(f"finalTraining_v1_tbExport.csv", index=True)

The result csv is saved in:
[7_finalTrainingPlotsGenerator.ipynb](7_finalTrainingPlotsGenerator.ipynb)

In [5]:
# Pfad zur TensorBoard Event-Datei
event_file = "runs/May22_23-13-35_deep-learning_FinalTraining_v1/events.out.tfevents.1716412415.deep-learning.1381720.0"

# Funktion zur Extraktion der Trainingsdauer aus der Event-Datei
def get_training_duration(event_file):
    first_timestamp = None
    last_timestamp = None

    for summary in summary_iterator(event_file):
        if first_timestamp is None:
            first_timestamp = summary.wall_time
        last_timestamp = summary.wall_time

    if first_timestamp and last_timestamp:
        duration = last_timestamp - first_timestamp
        duration_hours = duration / 3600  # Umrechnung von Sekunden in Stunden
        return duration_hours
    else:
        return "Keine gültigen Ereignisse gefunden."

# Trainingsdauer ermitteln
training_duration = get_training_duration(event_file)
print(f"Das Training dauerte: {training_duration}")

Das Training dauerte: 36.431484684149424


## Plots

### Define Plot Layout

In [4]:
# Define Academic layout for Plotly charts
layout = {
    'plot_bgcolor': 'rgb(255, 255, 255)',
    'paper_bgcolor': 'rgb(255, 255, 255)',
    'font': {
        'family': 'Arial, sans-serif',
        'size': 12,
        'color': '#333333'
    },
    'xaxis': {
        'title': 'X Axis Title',
        'titlefont': {
            'size': 14,
            'color': '#333333'
        },
        'tickfont': {
            'size': 12,
            'color': '#333333'
        },
        'showgrid': True,
        'gridcolor': 'rgba(200, 200, 200, 0.5)',
        'zeroline': False,
    },
    'yaxis': {
        'title': 'Y Axis Title',
        'titlefont': {
            'size': 14,
            'color': '#333333'
        },
        'tickfont': {
            'size': 12,
            'color': '#333333'
        },
        'showgrid': True,
        'gridcolor': 'rgba(200, 200, 200, 0.5)',
        'zeroline': False,
    },
    'legend': {
        'font': {
            'size': 12,
            'color': '#333333'
        }
    }
}

In [None]:
# copy df to training_df for further processing
training_df = df

### Train and Eval Loss

In [10]:
import plotly.graph_objects as go

# Assuming 'training_df' is your dataframe
fig_loss = go.Figure()

# Plot train/loss_value
fig_loss.add_trace(go.Scatter(
    x=training_df['train/loss_step'], 
    y=training_df['train/loss_value'],
    mode='lines',
    name='Train Loss'
))

# Plot eval/loss_value if it exists in the dataframe
if 'eval/loss_value' in training_df.columns:
    fig_loss.add_trace(go.Scatter(
        x=training_df['train/loss_step'],  # Assuming the steps are the same for eval and train
        y=training_df['eval/loss_value'],
        mode='lines',
        name='Eval Loss'
    ))

fig_loss.update_layout(layout)
# Update layout
fig_loss.update_layout(
    title='Training and Evaluation Loss over Steps',
    xaxis_title='Steps',
    yaxis_title='Loss Value',
)

# Show plot
fig_loss.show()
# fig_loss.write_image("plotly/final_training/loss_800_400_5.png", width=800, height=400, scale=5)


### F1 Score

In [19]:
## FI PLOT ##

import plotly.graph_objects as go

# Create a new figure for F1 scores
fig_f1 = go.Figure()

# Plot F1 Macro score
if 'eval/f1 macro_step' in training_df.columns and 'eval/f1 macro_value' in training_df.columns:
    fig_f1.add_trace(go.Scatter(
        x=training_df['eval/f1 macro_step'],
        y=training_df['eval/f1 macro_value'],
        mode='lines',
        name='F1 Macro'
    ))

# Plot F1 Micro score
if 'eval/f1 micro_step' in training_df.columns and 'eval/f1 micro_value' in training_df.columns:
    fig_f1.add_trace(go.Scatter(
        x=training_df['eval/f1 micro_step'],
        y=training_df['eval/f1 micro_value'],
        mode='lines',
        name='F1 Micro'
    ))

# Plot F1 Weighted score
if 'eval/f1 weighted_step' in training_df.columns and 'eval/f1 weighted_value' in training_df.columns:
    fig_f1.add_trace(go.Scatter(
        x=training_df['eval/f1 weighted_step'],
        y=training_df['eval/f1 weighted_value'],
        mode='lines',
        name='F1 Weighted'
    ))

fig_f1.update_layout(layout)

# Update layout
fig_f1.update_layout(
    title='F1 Scores over Steps',
    xaxis_title='Steps',
    yaxis_title='F1 Score',
)

# Show plot
fig_f1.show()
# fig_f1.write_image("plotly/final_training/f1_800_400_5.png", width=800, height=400, scale=5)

### Learning Rate

In [20]:
## Learning Rate ##

import plotly.graph_objects as go

# Create a new figure for learning rate
fig_lr = go.Figure()

# Plot train/learning_rate_value
fig_lr.add_trace(go.Scatter(
    x=training_df['train/learning_rate_step'],
    y=training_df['train/learning_rate_value'],
    mode='lines',
    name='Learning Rate'
))
fig_lr.update_layout(layout)
# Update layout
fig_lr.update_layout(
    title='Learning Rate over Steps',
    xaxis_title='Steps',
    yaxis_title='Learning Rate',
)

# Show plot
fig_lr.show()
# fig_lr.write_image("plotly/final_training/lr_800_400_5.png", width=800, height=400, scale=5)


### Eval Loss

In [21]:
## EVAL LOSS ##
import plotly.graph_objects as go

# Create a new figure for evaluation loss over time
fig_eval_loss = go.Figure()

# Plot eval/loss_value
fig_eval_loss.add_trace(go.Scatter(
    x=training_df['eval/loss_step'],  # Wall time for x-axis
    y=training_df['eval/loss_value'],
    mode='lines',
    name='Eval Loss'
))

fig_eval_loss.update_layout(layout)
# Update layout
fig_eval_loss.update_layout(
    title='Evaluation Loss over Steps',
    xaxis_title='Steps',
    yaxis_title='Evaluation Loss Value',
)

# Show plot
fig_eval_loss.show()
# fig_eval_loss.write_image("plotly/final_training/eval_loss_800_400_5.png", width=800, height=400, scale=5)

### Eval BLEU

In [23]:
## EVAL BLEU ##
import plotly.graph_objects as go

# Create a new figure for evaluation loss over time
fig_bleu = go.Figure()

# Plot bleu_step/bleu_value
fig_bleu.add_trace(go.Scatter(
    x=training_df['eval/bleu_step'],  # Wall time for x-axis
    y=training_df['eval/bleu_value'],
    mode='lines',
    name='BLEU Score'
))

fig_bleu.update_layout(layout)
# Update layout
fig_bleu.update_layout(
    title='BLEU Score over Steps',
    xaxis_title='Steps',
    yaxis_title='BLEU Score',
)

# Show plot
fig_bleu.show()
# fig_bleu.write_image("plotly/final_training/bleu_800_400_5.png", width=800, height=400, scale=5)

### Length Metrics

In [15]:
import plotly.graph_objects as go

# Create a new figure for the evaluation metrics
fig_length_metrics = go.Figure()

# # Plot eval/length_ratio_value
# if 'eval/length_ratio_value' in training_df.columns:
#     fig_length_metrics.add_trace(go.Scatter(
#         x=training_df['eval/length_ratio_step'],
#         y=500000 + training_df['eval/length_ratio_value'],
#         mode='lines+markers',
#         name='Length Ratio'
#     ))

# Plot eval/translation_length_value
if 'eval/translation_length_value' in training_df.columns:
    fig_length_metrics.add_trace(go.Scatter(
        x=training_df['eval/translation_length_step'],
        y=training_df['eval/translation_length_value'],
        mode='lines',
        name='Generation Length'
    ))

# Plot eval/reference_length_value
if 'eval/reference_length_value' in training_df.columns:
    fig_length_metrics.add_trace(go.Scatter(
        x=training_df['eval/reference_length_step'],
        y=training_df['eval/reference_length_value'],
        mode='lines',
        name='Reference Length'
    ))

fig_length_metrics.update_layout(layout)
# Update layout
fig_length_metrics.update_layout(
    title='Evaluation Length Metrics over Steps',
    xaxis_title='Steps',
    yaxis_title='Values',
)

# Show plot
fig_length_metrics.show()
# fig_length_metrics.write_image("plotly/final_training/length_metrics_800_400_5.png", width=800, height=400, scale=5)

### Runtime

In [16]:
import plotly.graph_objects as go

# Create a new figure for eval/runtime metrics
fig_runtime = go.Figure()

# Plot eval/runtime_value
fig_runtime.add_trace(go.Scatter(
    x=training_df['eval/runtime_step'], 
    y=training_df['eval/runtime_value'],
    mode='lines',
    name='Eval Runtime'
))

fig_runtime.update_layout(layout)
# Update layout
fig_runtime.update_layout(
    title='Evaluation Runtime over Steps',
    xaxis_title='Steps',
    yaxis_title='Runtime',
)

# Show plot
fig_runtime.show()
# fig_runtime.write_image("plotly/final_training/runtime_800_400_5.png", width=800, height=400, scale=5)

### Perfomrmance

In [17]:
import plotly.graph_objects as go

# Create a new figure for evaluation performance metrics
fig_performance = go.Figure()

# Plot eval/samples_per_second_value
if 'eval/samples_per_second_value' in training_df.columns:
    fig_performance.add_trace(go.Scatter(
        x=training_df['eval/samples_per_second_step'],
        y=training_df['eval/samples_per_second_value'],
        mode='lines',
        name='Samples per Second'
    ))

# Plot eval/steps_per_second_value
if 'eval/steps_per_second_value' in training_df.columns:
    fig_performance.add_trace(go.Scatter(
        x=training_df['eval/steps_per_second_step'],
        y=training_df['eval/steps_per_second_value'],
        mode='lines',
        name='Steps per Second'
    ))

fig_performance.update_layout(layout)
# Update layout
fig_performance.update_layout(
    title='Evaluation Performance Metrics over Steps',
    xaxis_title='Steps',
    yaxis_title='Value',
)

# Show plot
fig_performance.show()
# fig_performance.write_image("plotly/final_training/performance_800_400_5.png", width=800, height=400, scale=5)


### Training Runtime

In [80]:
import plotly.graph_objects as go

# Create a new figure for training runtime
fig_runtime = go.Figure()

# Plot train runtime
if 'train/train_runtime_value' in training_df.columns:
    fig_runtime.add_trace(go.Scatter(
        x=training_df['train/train_runtime_step'],
        y=training_df['train/train_runtime_value'],
        mode='lines+markers',
        name='Training Runtime'
    ))

# Update layout
fig_runtime.update_layout(
    title='Training Runtime over Steps',
    xaxis_title='Steps',
    yaxis_title='Runtime (seconds)',
)

# Show plot
fig_runtime.show()


### Wall Time

In [18]:
import plotly.graph_objects as go

# Create a new figure for epoch wall time
fig_epoch = go.Figure()

# Plot train/epoch_wall_time
fig_epoch.add_trace(go.Scatter(
    x=training_df['train/epoch_step'],
    y=training_df['train/epoch_wall_time'],
    mode='lines+markers',
    name='Epoch Wall Time'
))

fig_epoch.update_layout(layout)
# Update layout
fig_epoch.update_layout(
    title='Epoch Wall Time over Steps',
    xaxis_title='Epoch Step',
    yaxis_title='Epoch Wall Time',
)

# Show plot
fig_epoch.show()
# fig_epoch.write_image("plotly/final_training/epoch_800_400_5.png", width=800, height=400, scale=5)

### Training Speed

In [81]:
import plotly.graph_objects as go

# Create a new figure for training samples and steps per second
fig_train_speed = go.Figure()

# Plot train_samples_per_second
fig_train_speed.add_trace(go.Scatter(
    x=training_df['train/train_samples_per_second_step'],
    y=training_df['train/train_samples_per_second_value'],
    mode='lines+markers',
    name='Samples per Second'
))

# Plot train_steps_per_second
fig_train_speed.add_trace(go.Scatter(
    x=training_df['train/train_steps_per_second_step'],
    y=training_df['train/train_steps_per_second_value'],
    mode='lines+markers',
    name='Steps per Second'
))

# Update layout
fig_train_speed.update_layout(
    title='Training Speed Metrics over Steps',
    xaxis_title='Steps',
    yaxis_title='Speed',
)

# Show plot
fig_train_speed.show()


### Total FLOPS

In [82]:
import plotly.graph_objects as go

# Create a new figure for FLOPS
fig_flops = go.Figure()

# Plot train/total_flos_value
fig_flops.add_trace(go.Scatter(
    x=training_df['train/total_flos_step'],
    y=training_df['train/total_flos_value'],
    mode='lines+markers',
    name='Total FLOPS'
))

# Update layout
fig_flops.update_layout(
    title='Total FLOPS over Steps',
    xaxis_title='Steps',
    yaxis_title='Total FLOPS Value',
)

# Show plot
fig_flops.show()
