In [None]:
%pip install hydra-core
%pip install pandas

custom_path = input("Enter the path to your seed folder: ").strip()

In [None]:
from mllm.training import produce_training_stats
training_data = produce_training_stats.get_iterations_data(custom_path)

# Analysis

In [None]:
import matplotlib.pyplot as plt
from statistics import mean
import numpy as np

In [None]:
paths = produce_training_stats.get_metric_paths(training_data)
print("Example metric paths:", paths[:10])
value_mb = produce_training_stats.get_metric_iteration_list(training_data, ["loss_mb_total", "value_mb_total"])
plt.plot([np.mean(c) for c in value_mb if c is not None])
plt.title("Value MB loss (mean) per iteration")

In [None]:
len(training_data)
training_data[0].keys()

In [None]:
from statistics import mean
kl_terms = produce_training_stats.get_metric_iteration_list(training_data, ["mb_kl_loss_terms"])
kl_means = [np.mean(c) for c in kl_terms if c is not None]
plt.plot(kl_means)
plt.title("KL loss terms (mean) per iteration")

In [None]:
value_mb = produce_training_stats.get_metric_iteration_list(training_data, ["loss_mb_total", "value_mb_total"])
value_means = [np.mean(c) for c in value_mb if c is not None]
plt.plot(value_means)
plt.title("Policy Gradient Value Loss (mean) per iteration")

# Get AdAlign Stats (Warning: Might be Affected by Padding)
If the number of timesteps in your trajectories is not constant, then the return tensors will be padded 
with zero values and these statistics will be false

In [None]:
# Updated metric names per new tally logging
a1 = produce_training_stats.get_single_metric_vector(training_data, ["regular_advantages"], range(len(training_data)))
a2 = produce_training_stats.get_single_metric_vector(training_data, ["regular_advantages_other"], range(len(training_data)))
reg_scores = produce_training_stats.get_single_metric_vector(training_data, ["raw_advantage_alignment_weights"], range(len(training_data)))
op_terms = produce_training_stats.get_single_metric_vector(training_data, ["ad_align_opp_shaping_terms"], range(len(training_data)))
aa_scores = produce_training_stats.get_single_metric_vector(training_data, ["final_advantage_alignment_credits"], range(len(training_data)))

n_bins = 25
fig, axs = plt.subplots(1, 5, sharey=True, tight_layout=True)
axs[0].hist(reg_scores, bins=n_bins)
axs[0].set_xlabel('Raw Weights')
axs[1].hist(op_terms, bins=n_bins)
axs[1].set_xlabel('OP term')
axs[2].hist(aa_scores, bins=n_bins)
axs[2].set_xlabel('AdAlign credit')
axs[3].hist(a1, bins=n_bins)
axs[3].set_xlabel('a1')
axs[4].hist(a2, bins=n_bins)
axs[4].set_xlabel('a2')
plt.show()

In [None]:
print("Weights Mean: ", np.mean(reg_scores))
print("Weights STD: ", np.std(reg_scores))
print("OP Mean: ", np.mean(op_terms))
print("OP STD: ", np.std(op_terms))
print("AA Mean: ", np.mean(aa_scores))
print("AA STD: ", np.std(aa_scores))
print("AA min: ", np.min(aa_scores))
print("AA max: ", np.max(aa_scores))

In [None]:
N = reg_scores.size
print("Corr. Coeff Reg & AA", np.corrcoef(reg_scores[:N], aa_scores[:N])[0,1])
print("Corr. Coeff OP & AA", np.corrcoef(op_terms[:N], aa_scores[:N])[0,1])