In [None]:
import os
import json
from collections import Counter, defaultdict

os.chdir("..")
import MatterSim
import numpy as np
import cv2
import matplotlib as mpl
from matplotlib import pyplot as plt
import pandas as pd
import seaborn as sns

print(os.getcwd())
mpl.rcParams['pdf.fonttype'] = 42  # Use TrueType fonts in PDF
mpl.rcParams['ps.fonttype'] = 42
mpl.rcParams['figure.dpi'] = 300  # Higher DPI for better rendering
mpl.rcParams['savefig.dpi'] = 300


## Validation Accuracy

In [None]:
# Load all three CSVs
action_space = "panoramic"

file_paths = {
    "Qwen2-VL-full": f"./wandb-csv/{action_space}/qwen2-{action_space}-full-validation-accuracy.csv",
    "Qwen2-VL-frozen": f"./wandb-csv/{action_space}/qwen2-{action_space}-freeze-validation-accuracy.csv",
    "Qwen2.5-VL-frozen": f"./wandb-csv/{action_space}/qwen2.5-{action_space}-freeze-validation-accuracy.csv"
}

# Prepare a combined DataFrame
combined_df = pd.DataFrame()

# Process each file and extract relevant columns
for model_name, path in file_paths.items():
    temp_df = pd.read_csv(path)
    
    # Find the column containing validation accuracy (assumes only one such column per file)
    val_acc_col = [col for col in temp_df.columns if "validation accuracy" in col and "__" not in col][0]
    
    # Simplify to just Step and Accuracy
    temp_df = temp_df[["Step", val_acc_col]].rename(columns={
        val_acc_col: "Validation Accuracy"
    })
    
    # Assign Epochs based on number of rows (assume one row per epoch)
    temp_df["Epoch"] = range(0, len(temp_df))
    temp_df["Model"] = model_name
    
    combined_df = pd.concat([combined_df, temp_df], ignore_index=True)

# Preview the combined data
combined_df.head()

# Adjust the Epoch numbering so it starts at 0 instead of 1
# Since the first row corresponds to before training, it should be Epoch 0
combined_df["Epoch"] = combined_df.groupby("Model").cumcount()

sns.set_theme(style="whitegrid", context="paper")
palette = sns.color_palette("colorblind", n_colors=combined_df["Model"].nunique())

# Replot with corrected epoch numbering
plt.figure(figsize=(10, 6))
sns.lineplot(data=combined_df, x="Epoch", y="Validation Accuracy", hue="Model", palette=palette, linewidth=4.0)

# Recalculate y-axis start for better focus
y_min = combined_df["Validation Accuracy"].min()
y_start = max(0, y_min - 0.05)

# Formatting
#plt.title("Validation Accuracy per Epoch for Low-Level Action Space", fontsize=16)
plt.xlabel("Epoch", fontsize=32)
plt.ylabel("Accuracy", fontsize=32)
plt.xlim(left=0, right=6)
plt.ylim(y_start, 1)
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)
plt.legend(title="Model", fontsize=17, title_fontsize=17)
plt.grid(True)
plt.tight_layout()
plt.savefig(f"./figures/plots/{action_space}/{action_space}-validation-accuracy.pdf", bbox_inches="tight", dpi=700)
plt.show()

## Validation loss

In [None]:
# Load all three CSVs
file_paths = {
    "Qwen2-VL-full": f"./wandb-csv/{action_space}/qwen2-{action_space}-full-validation-loss.csv",
    "Qwen2-VL-frozen": f"./wandb-csv/{action_space}/qwen2-{action_space}-freeze-validation-loss.csv",
    "Qwen2.5-VL-frozen": f"./wandb-csv/{action_space}/qwen2.5-{action_space}-freeze-validation-loss.csv"
}

# Prepare a combined DataFrame
combined_df = pd.DataFrame()

# Process each file and extract relevant columns
for model_name, path in file_paths.items():
    temp_df = pd.read_csv(path)
    
    # Find the column containing validation accuracy (assumes only one such column per file)
    val_acc_col = [col for col in temp_df.columns if "validation_loss" in col and "__" not in col][0]
    
    # Simplify to just Step and Accuracy
    temp_df = temp_df[["Step", val_acc_col]].rename(columns={
        val_acc_col: "validation_loss"
    })
    
    # Assign Epochs based on number of rows (assume one row per epoch)
    temp_df["Epoch"] = range(0, len(temp_df))
    temp_df["Model"] = model_name
    
    combined_df = pd.concat([combined_df, temp_df], ignore_index=True)

# Preview the combined data
combined_df.head()

# Adjust the Epoch numbering so it starts at 0 instead of 1
# Since the first row corresponds to before training, it should be Epoch 0
combined_df["Epoch"] = combined_df.groupby("Model").cumcount()

sns.set_theme(style="whitegrid", context="paper")
palette = sns.color_palette("colorblind", n_colors=combined_df["Model"].nunique())

# Replot with corrected epoch numbering
plt.figure(figsize=(10, 6))
sns.lineplot(
    data=combined_df, 
    x="Epoch", 
    y="validation_loss", 
    hue="Model", 
    palette=palette,
    linewidth=4.0
)

# Recalculate y-axis start for better focus
y_min = combined_df["validation_loss"].min()
y_start = max(0, y_min - 0.05)

# Formatting
#plt.title("Validation Loss per Epoch for Low-Level Action Space", fontsize=16)
plt.xlabel("Epoch", fontsize=32)
plt.ylabel("Loss", fontsize=32)
plt.xlim(left=0, right=6)
plt.ylim(y_start, 4.5)
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)
plt.legend(title="Model", fontsize=17, title_fontsize=17)
plt.grid(True)
plt.tight_layout()
plt.savefig(f"./figures/plots/{action_space}/{action_space}-validation-loss.pdf", bbox_inches="tight", dpi=700)
plt.show()

## Validation Success Rate

In [None]:
# Load all three CSVs
file_paths = {
    "Qwen2-VL-full": f"./wandb-csv/{action_space}/qwen2-{action_space}-full-validation-SR.csv",
    "Qwen2-VL-frozen": f"./wandb-csv/{action_space}/qwen2-{action_space}-freeze-validation-SR.csv",
    "Qwen2.5-VL-frozen": f"./wandb-csv/{action_space}/qwen2.5-{action_space}-freeze-validation-SR.csv"
}

# Prepare a combined DataFrame
combined_df = pd.DataFrame()

# Process each file and extract relevant columns
for model_name, path in file_paths.items():
    temp_df = pd.read_csv(path)
    
    # Find the column containing validation accuracy (assumes only one such column per file)
    val_acc_col = [col for col in temp_df.columns if "validation sucess rate" in col and "__" not in col][0]
    
    # Simplify to just Step and Accuracy
    temp_df = temp_df[["Step", val_acc_col]].rename(columns={
        val_acc_col: "validation sucess rate"
    })
    
    # Assign Epochs based on number of rows (assume one row per epoch)
    temp_df["Epoch"] = range(0, len(temp_df))
    temp_df["Model"] = model_name
    
    combined_df = pd.concat([combined_df, temp_df], ignore_index=True)

# Preview the combined data
combined_df.head()

# Adjust the Epoch numbering so it starts at 0 instead of 1
# Since the first row corresponds to before training, it should be Epoch 0
combined_df["Epoch"] = combined_df.groupby("Model").cumcount()

sns.set_theme(style="whitegrid", context="paper")
palette = sns.color_palette("colorblind", n_colors=combined_df["Model"].nunique())

# Replot with corrected epoch numbering
plt.figure(figsize=(10, 6))
sns.lineplot(data=combined_df, x="Epoch", y="validation sucess rate", hue="Model", palette=palette, linewidth=4.0)

# Recalculate y-axis start for better focus
y_min = combined_df["validation sucess rate"].min()
y_start = max(0, y_min - 0.05)

# Formatting
#plt.title("Conservative Success Rate per Epoch for Low-Level Action Space", fontsize=16)
plt.xlabel("Epoch", fontsize=32)
plt.ylabel("Success Rate", fontsize=32)
plt.xlim(left=0, right=6)
plt.ylim(y_start, 0.18)
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)
plt.legend(title="Model", fontsize=17, title_fontsize=17)
plt.grid(True)
plt.tight_layout()
plt.savefig(f"./figures/plots/{action_space}/{action_space}-validation-succcess-rate.pdf", bbox_inches="tight", dpi=700)
plt.show()

## Train Loss

In [None]:
# Load all three CSVs
file_paths = {
    "Qwen2-VL-full": f"./wandb-csv/{action_space}/qwen2-{action_space}-full-train-loss.csv",
    "Qwen2-VL-frozen": f"./wandb-csv/{action_space}/qwen2-{action_space}-freeze-train-loss.csv",
    "Qwen2.5-VL-frozen": f"./wandb-csv/{action_space}/qwen2.5-{action_space}-freeze-train-loss.csv"
}

# Prepare a combined DataFrame
combined_df = pd.DataFrame()

# Process each file and extract relevant columns
for model_name, path in file_paths.items():
    temp_df = pd.read_csv(path)
    
    # Find the column containing validation accuracy (assumes only one such column per file)
    val_acc_col = [col for col in temp_df.columns if "training_loss" in col and "__" not in col][0]
    
    # Simplify to just Step and Accuracy
    temp_df = temp_df[["Step", val_acc_col]].rename(columns={
        val_acc_col: "training_loss"
    })
    
    # Assign Epochs based on number of rows (assume one row per epoch)
    temp_df["Epoch"] = range(0, len(temp_df))
    temp_df["Model"] = model_name
    
    combined_df = pd.concat([combined_df, temp_df], ignore_index=True)

# Preview the combined data
combined_df.head()

# Adjust the Epoch numbering so it starts at 0 instead of 1
# Since the first row corresponds to before training, it should be Epoch 0
combined_df["Epoch"] = combined_df.groupby("Model").cumcount()

sns.set_theme(style="whitegrid", context="paper")
palette = sns.color_palette("colorblind", n_colors=combined_df["Model"].nunique())

# Replot with corrected epoch numbering
plt.figure(figsize=(10, 6))
sns.lineplot(data=combined_df, x="Epoch", y="training_loss", hue="Model", palette=palette, linewidth=4.0)

# Recalculate y-axis start for better focus
y_min = combined_df["training_loss"].min()
y_start = max(0, y_min - 0.05)

# Formatting
#plt.title("Train Loss per Epoch for Low-Level Action Space", fontsize=16)
plt.xlabel("Epoch", fontsize=32)
plt.ylabel("Loss", fontsize=32)
plt.xlim(left=0, right=5)
plt.ylim(y_start, 1.5)
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)
plt.legend(title="Model", fontsize=17, title_fontsize=17)
plt.grid(True)
plt.tight_layout()
plt.savefig(f"./figures/plots/{action_space}/{action_space}-train-loss.pdf", bbox_inches="tight", dpi=700)
plt.show()

## Stop Recall

In [None]:
# Load all three CSVs
file_paths = {
    "Qwen2-VL-full": f"./wandb-csv/{action_space}/qwen2-{action_space}-full-stop.csv",
    "Qwen2-VL-frozen": f"./wandb-csv/{action_space}/qwen2-{action_space}-freeze-stop.csv",
    "Qwen2.5-VL-frozen": f"./wandb-csv/{action_space}/qwen2.5-{action_space}-freeze-stop.csv"
}

# Prepare a combined DataFrame
combined_df = pd.DataFrame()

# Process each file and extract relevant columns
for model_name, path in file_paths.items():
    temp_df = pd.read_csv(path)
    
    # Find the column containing validation accuracy (assumes only one such column per file)
    val_acc_col = [col for col in temp_df.columns if "Stop precision" in col and "__" not in col][0]
    
    # Simplify to just Step and Accuracy
    temp_df = temp_df[["Step", val_acc_col]].rename(columns={
        val_acc_col: "Stop precision"
    })
    
    # Assign Epochs based on number of rows (assume one row per epoch)
    temp_df["Epoch"] = range(0, len(temp_df))
    temp_df["Model"] = model_name
    
    combined_df = pd.concat([combined_df, temp_df], ignore_index=True)

# Preview the combined data
combined_df.head()

# Adjust the Epoch numbering so it starts at 0 instead of 1
# Since the first row corresponds to before training, it should be Epoch 0
combined_df["Epoch"] = combined_df.groupby("Model").cumcount()

sns.set_theme(style="whitegrid", context="paper")
palette = sns.color_palette("colorblind", n_colors=combined_df["Model"].nunique())

# Replot with corrected epoch numbering
plt.figure(figsize=(10, 6))
sns.lineplot(data=combined_df, x="Epoch", y="Stop precision", hue="Model", palette=palette, linewidth=4.0)

# Recalculate y-axis start for better focus
y_min = combined_df["Stop precision"].min()
y_start = max(0, y_min - 0.05)

# Formatting
#plt.title("Train Loss per Epoch for Low-Level Action Space", fontsize=16)
plt.xlabel("Epoch", fontsize=32)
plt.ylabel("Recall", fontsize=32)
plt.xlim(left=0, right=5)
plt.ylim(y_start, 1)
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)
plt.legend(title="Model", fontsize=17, title_fontsize=17)
plt.grid(True)
plt.tight_layout()
plt.savefig(f"./figures/plots/{action_space}/{action_space}-validation-stop.pdf", bbox_inches="tight", dpi=700)
plt.show()

## Left Recall

In [None]:
# Load all three CSVs
file_paths = {
    "Qwen2-VL-full": "./wandb-csv/low-level/qwen2-low-level-full-left.csv",
    "Qwen2-VL-frozen": "./wandb-csv/low-level/qwen2-low-level-freeze-left.csv",
    "Qwen2.5-VL-frozen": "./wandb-csv/low-level/qwen2.5-low-level-freeze-left.csv"
}

# Prepare a combined DataFrame
combined_df = pd.DataFrame()

# Process each file and extract relevant columns
for model_name, path in file_paths.items():
    temp_df = pd.read_csv(path)
    
    # Find the column containing validation accuracy (assumes only one such column per file)
    val_acc_col = [col for col in temp_df.columns if "Left precision" in col and "__" not in col][0]
    
    # Simplify to just Step and Accuracy
    temp_df = temp_df[["Step", val_acc_col]].rename(columns={
        val_acc_col: "Left precision"
    })
    
    # Assign Epochs based on number of rows (assume one row per epoch)
    temp_df["Epoch"] = range(0, len(temp_df))
    temp_df["Model"] = model_name
    
    combined_df = pd.concat([combined_df, temp_df], ignore_index=True)

# Preview the combined data
combined_df.head()

# Adjust the Epoch numbering so it starts at 0 instead of 1
# Since the first row corresponds to before training, it should be Epoch 0
combined_df["Epoch"] = combined_df.groupby("Model").cumcount()

sns.set_theme(style="whitegrid", context="paper")
palette = sns.color_palette("colorblind", n_colors=combined_df["Model"].nunique())

# Replot with corrected epoch numbering
plt.figure(figsize=(10, 6))
sns.lineplot(data=combined_df, x="Epoch", y="Left precision", hue="Model", palette=palette, linewidth=4.0)

# Recalculate y-axis start for better focus
y_min = combined_df["Left precision"].min()
y_start = max(0, y_min - 0.05)

# Formatting
#plt.title("Train Loss per Epoch for Low-Level Action Space", fontsize=16)
plt.xlabel("Epoch", fontsize=32)
plt.ylabel("Recall", fontsize=32)
plt.xlim(left=0, right=5)
plt.ylim(y_start, 1)
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)
plt.legend(title="Model", fontsize=17, title_fontsize=17)
plt.grid(True)
plt.tight_layout()
plt.savefig(f"./figures/plots/{action_space}/low-level-validation-left.pdf", bbox_inches="tight", dpi=700)
plt.show()

## Right Recall

In [None]:
# Load all three CSVs
file_paths = {
    "Qwen2-VL-full": "./wandb-csv/low-level/qwen2-low-level-full-right.csv",
    "Qwen2-VL-frozen": "./wandb-csv/low-level/qwen2-low-level-freeze-right.csv",
    "Qwen2.5-VL-frozen": "./wandb-csv/low-level/qwen2.5-low-level-freeze-right.csv"
}

# Prepare a combined DataFrame
combined_df = pd.DataFrame()

# Process each file and extract relevant columns
for model_name, path in file_paths.items():
    temp_df = pd.read_csv(path)
    
    # Find the column containing validation accuracy (assumes only one such column per file)
    val_acc_col = [col for col in temp_df.columns if "Right precision" in col and "__" not in col][0]
    
    # Simplify to just Step and Accuracy
    temp_df = temp_df[["Step", val_acc_col]].rename(columns={
        val_acc_col: "Right precision"
    })
    
    # Assign Epochs based on number of rows (assume one row per epoch)
    temp_df["Epoch"] = range(0, len(temp_df))
    temp_df["Model"] = model_name
    
    combined_df = pd.concat([combined_df, temp_df], ignore_index=True)

# Preview the combined data
combined_df.head()

# Adjust the Epoch numbering so it starts at 0 instead of 1
# Since the first row corresponds to before training, it should be Epoch 0
combined_df["Epoch"] = combined_df.groupby("Model").cumcount()

sns.set_theme(style="whitegrid", context="paper")
palette = sns.color_palette("colorblind", n_colors=combined_df["Model"].nunique())

# Replot with corrected epoch numbering
plt.figure(figsize=(10, 6))
sns.lineplot(data=combined_df, x="Epoch", y="Right precision", hue="Model", palette=palette, linewidth=4.0)

# Recalculate y-axis start for better focus
y_min = combined_df["Right precision"].min()
y_start = max(0, y_min - 0.05)

# Formatting
#plt.title("Train Loss per Epoch for Low-Level Action Space", fontsize=16)
plt.xlabel("Epoch", fontsize=32)
plt.ylabel("Recall", fontsize=32)
plt.xlim(left=0, right=5)
plt.ylim(y_start, 1)
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)
plt.legend(title="Model", fontsize=17, title_fontsize=17)
plt.grid(True)
plt.tight_layout()
plt.savefig(f"./figures/plots/{action_space}/low-level-validation-right.pdf", bbox_inches="tight", dpi=700)
plt.show()

## Move Recall

In [None]:
# Load all three CSVs
file_paths = {
    "Qwen2-VL-full": "./wandb-csv/low-level/qwen2-low-level-full-move.csv",
    "Qwen2-VL-frozen": "./wandb-csv/low-level/qwen2-low-level-freeze-move.csv",
    "Qwen2.5-VL-frozen": "./wandb-csv/low-level/qwen2.5-low-level-freeze-move.csv"
}

# Prepare a combined DataFrame
combined_df = pd.DataFrame()

# Process each file and extract relevant columns
for model_name, path in file_paths.items():
    temp_df = pd.read_csv(path)
    
    # Find the column containing validation accuracy (assumes only one such column per file)
    val_acc_col = [col for col in temp_df.columns if "Move precision" in col and "__" not in col][0]
    
    # Simplify to just Step and Accuracy
    temp_df = temp_df[["Step", val_acc_col]].rename(columns={
        val_acc_col: "Move precision"
    })
    
    # Assign Epochs based on number of rows (assume one row per epoch)
    temp_df["Epoch"] = range(0, len(temp_df))
    temp_df["Model"] = model_name
    
    combined_df = pd.concat([combined_df, temp_df], ignore_index=True)

# Preview the combined data
combined_df.head()

# Adjust the Epoch numbering so it starts at 0 instead of 1
# Since the first row corresponds to before training, it should be Epoch 0
combined_df["Epoch"] = combined_df.groupby("Model").cumcount()

sns.set_theme(style="whitegrid", context="paper")
palette = sns.color_palette("colorblind", n_colors=combined_df["Model"].nunique())

# Replot with corrected epoch numbering
plt.figure(figsize=(10, 6))
sns.lineplot(data=combined_df, x="Epoch", y="Move precision", hue="Model", palette=palette, linewidth=4.0)

# Recalculate y-axis start for better focus
y_min = combined_df["Move precision"].min()
y_start = max(0, y_min - 0.05)

# Formatting
#plt.title("Train Loss per Epoch for Low-Level Action Space", fontsize=16)
plt.xlabel("Epoch", fontsize=32)
plt.ylabel("Recall", fontsize=32)
plt.xlim(left=0, right=5)
plt.ylim(y_start, 1)
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)
plt.legend(title="Model", fontsize=17, title_fontsize=17)
plt.grid(True)
plt.tight_layout()
plt.savefig(f"./figures/plots/{action_space}/low-level-validation-move.pdf", bbox_inches="tight", dpi=700)
plt.show()

## Episode loss

In [None]:
# Load all three CSVs
action_space = "panoramic"
model = "qwen2"
mode = "full"
type_loss = "episode"

file_paths = {
    "Qwen2-VL-full": f"./wandb-csv/{action_space}/qwen2-{action_space}-{mode}-{type_loss}-loss.csv",
    #"Qwen2-VL-frozen": f"./wandb-csv/{action_space}/qwen2-{action_space}-{mode}-{type_loss}-loss.csv",
    #"Qwen2.5-VL-frozen": f"./wandb-csv/{action_space}/qwen2.5-{action_space}-{mode}-{type_loss}-loss.csv"
}

palette = {
    "Qwen2-VL-full": "#1f77b4",           # blue
    "Qwen2-VL-frozen": "#ff7f0e",   # orange
    "Qwen2.5-VL-frozen": "#2ca02c"  # green
}

# Prepare a combined DataFrame
combined_df = pd.DataFrame()

# Process each file and extract relevant columns
for model_name, path in file_paths.items():
    temp_df = pd.read_csv(path)
    
    # Find the column containing validation accuracy (assumes only one such column per file)
    val_acc_col = [col for col in temp_df.columns if "episode_loss" in col and "__" not in col][0]
    
    # Simplify to just Step and Accuracy
    temp_df = temp_df[["Step", val_acc_col]].rename(columns={
        val_acc_col: "episode_loss"
    })
    
    # Assign Epochs based on number of rows (assume one row per epoch)
    temp_df["Model"] = model_name
    
    combined_df = pd.concat([combined_df, temp_df], ignore_index=True)

# Preview the combined data
combined_df.head()
# Adjust the Epoch numbering so it starts at 0 instead of 1
# Since the first row corresponds to before training, it should be Epoch 0
#print(combined_df["Step"][29999])
sns.set_theme(style="whitegrid", context="paper")
#palette = sns.color_palette("colorblind", n_colors=combined_df["Model"].nunique())

# Replot with corrected epoch numbering
plt.figure(figsize=(10, 6))
sns.lineplot(data=combined_df, x="Step", y="episode_loss", hue="Model", palette=palette, linewidth=4.0)

# Recalculate y-axis start for better focus
y_min = combined_df["episode_loss"].min()
y_start = max(0, y_min - 0.05)

# Formatting
#plt.title("Train Loss per Epoch for Low-Level Action Space", fontsize=16)
plt.xlabel("Step", fontsize=32)
plt.ylabel("Loss", fontsize=32)
plt.xlim(left=0, right=combined_df["Step"].iloc[-1])
#plt.ylim(y_start, 1)
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)
plt.legend(title="Model", fontsize=17, title_fontsize=17)
plt.grid(True)
plt.tight_layout()
plt.savefig(f"./figures/plots/{action_space}/{model}-{mode}-{action_space}-{type_loss}-loss.pdf", bbox_inches="tight", dpi=700)
plt.show()