In [3]:
!pip install matplotlib

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m


In [3]:
import os
import matplotlib.pyplot as plt

# Path to the results folder
results_dir = "results"
output_dir = "plots"
os.makedirs(output_dir, exist_ok=True)

# Frame sample frequencies
frame_freqs = [5, 10, 20, 40]

# Initialize dictionary to store metrics
# Structure: metrics[metric_name][model_name] = list of values ordered by frame_freqs
metrics = {}

# Iterate through model folders
for model_name in os.listdir(results_dir):
    model_path = os.path.join(results_dir, model_name)
    if not os.path.isdir(model_path):
        continue
    # Skip undesired models
    if model_name.lower() in ["mistral", "phi"]:
        continue

    # Initialize dictionary for this model
    model_metrics = {}

    # Iterate through txt files
    for freq in frame_freqs:
        file_name = f"{freq}_results.txt"
        file_path = os.path.join(model_path, file_name)
        if not os.path.isfile(file_path):
            print(f"Warning: {file_path} not found!")
            continue

        with open(file_path, "r") as f:
            for line in f:
                line = line.strip()
                if not line or line.startswith("num_samples"):
                    continue
                try:
                    metric_name, rest = line.split(":", 1)
                    avg_str = rest.split(",")[0].split("=")[1].strip()
                    avg_value = float(avg_str)
                    if metric_name not in model_metrics:
                        model_metrics[metric_name] = []
                    model_metrics[metric_name].append(avg_value)
                except Exception as e:
                    print(f"Error parsing line: {line} -> {e}")

    # Store the metrics
    for metric_name, values in model_metrics.items():
        if metric_name not in metrics:
            metrics[metric_name] = {}
        metrics[metric_name][model_name] = values

# Plot each metric
for metric_name, model_data in metrics.items():
    plt.figure(figsize=(8,6))
    for model_name, values in model_data.items():
        if len(values) != len(frame_freqs):
            print(f"Warning: model {model_name} missing values for metric {metric_name}")
        plt.plot(frame_freqs[:len(values)], values, marker='o', label=model_name)
    
    plt.title(metric_name)
    plt.xlabel("Frame Sample Frequency")
    plt.ylabel(metric_name)
    plt.xticks(frame_freqs)
    plt.grid(True)
    plt.legend()
    plt.tight_layout()
    
    # Save figure
    save_path = os.path.join(output_dir, f"{metric_name}.png")
    plt.savefig(save_path)
    plt.close()
    print(f"Saved plot: {save_path}")

print("All plots generated in folder:", output_dir)


Saved plot: plots/PBERT.png
Saved plot: plots/RBERT.png
Saved plot: plots/FBERT.png
Saved plot: plots/SBERTSim.png
Saved plot: plots/TemporalCoherence_NSP_true.png
Saved plot: plots/TemporalCoherence_NSP_shuffled.png
Saved plot: plots/TemporalCoherence_NSP_delta.png
Saved plot: plots/TemporalAlignment_DTW.png
Saved plot: plots/TemporalContradictionRate_NLI.png
Saved plot: plots/BLEU4.png
Saved plot: plots/METEOR.png
Saved plot: plots/CIDEr.png
All plots generated in folder: plots


In [4]:
import os

# Folder containing the plots (same level as this script)
plots_dir = "plots"

# Make sure the folder exists
if not os.path.isdir(plots_dir):
    print(f"Folder '{plots_dir}' does not exist!")
    exit(1)

# List all files ending with .png
plot_files = [f for f in os.listdir(plots_dir) if f.endswith(".png")]

# Optionally, strip the extension
plot_names = [os.path.splitext(f)[0] for f in plot_files]

# Print the names
print("Graphs found:")
for name in plot_names:
    print(name)

# Optional: save to a txt file
output_file = "plot_names.txt"
with open(output_file, "w") as f:
    for name in plot_names:
        f.write(name + "\n")

print(f"\nPlot names saved to {output_file}")


Graphs found:
CIDEr
METEOR
BLEU4
TemporalContradictionRate_NLI
TemporalAlignment_DTW
TemporalCoherence_NSP_delta
TemporalCoherence_NSP_shuffled
TemporalCoherence_NSP_true
SBERTSim
FBERT
RBERT
PBERT

Plot names saved to plot_names.txt


In [7]:
!pip install seaborn

Collecting seaborn
  Downloading seaborn-0.13.2-py3-none-any.whl.metadata (5.4 kB)
Collecting pandas>=1.2 (from seaborn)
  Downloading pandas-2.3.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (91 kB)
Collecting pytz>=2020.1 (from pandas>=1.2->seaborn)
  Downloading pytz-2025.2-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas>=1.2->seaborn)
  Downloading tzdata-2025.2-py2.py3-none-any.whl.metadata (1.4 kB)
Downloading seaborn-0.13.2-py3-none-any.whl (294 kB)
Downloading pandas-2.3.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.4/12.4 MB[0m [31m392.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pytz-2025.2-py2.py3-none-any.whl (509 kB)
Downloading tzdata-2025.2-py2.py3-none-any.whl (347 kB)
Installing collected packages: pytz, tzdata, pandas, seaborn
Successfully installed pandas-2.3.1 pytz-2025.2 seaborn-0.13.2 tzdata-2025.2
[0m
[1m[[0m[34

In [12]:
import os
import matplotlib.pyplot as plt
import seaborn as sns
from collections import defaultdict

results_dir = "results"
output_dir = "graphs"
os.makedirs(output_dir, exist_ok=True)

# Hardcoded missing metrics
hardcoded_metrics = {
    "BLEU4": {
        "GPT-4o":   {5: 4.31, 10: 4.73, 20: 4.69, 40: 4.52},
        "VLLaMA-3": {5: 3.60, 10: 4.10, 20: 4.32, 40: 4.79}
    },
    "METEOR": {
        "GPT-4o":   {5: 27.58, 10: 28.47, 20: 27.91, 40: 28.07},
        "VLLaMA-3": {5: 22.48, 10: 22.71, 20: 22.28, 40: 21.96}
    },
    "CIDEr": {
        "GPT-4o":   {5: 0.447, 10: 0.483, 20: 0.492, 40: 0.479},
        "VLLaMA-3": {5: 0.449, 10: 0.497, 20: 0.520, 40: 0.585}
    }
}

# Collect all frame frequencies dynamically
frame_freqs_set = set()
metrics = defaultdict(lambda: defaultdict(dict))  # metrics[metric_name][model_name][freq] = value

# Load existing txt data
for model_name in os.listdir(results_dir):
    model_path = os.path.join(results_dir, model_name)
    if not os.path.isdir(model_path):
        continue
    if model_name.lower() in ["mistral", "phi"]:
        continue
    if model_name.lower() == "gpt":
        model_name="GPT-4o"
    if model_name.lower() == "llama3":
        model_name="VLLaMA-3"

    for file_name in os.listdir(model_path):
        if not file_name.endswith("_results.txt"):
            continue
        freq_str = file_name.split("_")[0]
        try:
            freq = int(freq_str)
        except ValueError:
            continue
        frame_freqs_set.add(freq)
        file_path = os.path.join(model_path, file_name)

        with open(file_path, "r") as f:
            for line in f:
                line = line.strip()
                if not line or line.startswith("num_samples"):
                    continue
                try:
                    metric_name, rest = line.split(":", 1)
                    avg_str = rest.split(",")[0].split("=")[1].strip()
                    avg_value = float(avg_str)
                    metrics[metric_name][model_name][freq] = avg_value
                except Exception as e:
                    print(f"Error parsing line: {line} -> {e}")

# Merge hardcoded metrics
for metric_name, model_data in hardcoded_metrics.items():
    for model_name, freq_data in model_data.items():
        for freq, value in freq_data.items():
            metrics[metric_name][model_name][freq] = value
            frame_freqs_set.add(freq)

# Sort frame frequencies
frame_freqs = sorted(list(frame_freqs_set))

# Seaborn styling
sns.set(style="whitegrid")

# Determine display names for color mapping
all_display_names = set()
for md in metrics.values():
    for m in md.keys():
        display_name = "DynaStride" if m.lower() == "qwen" else m
        all_display_names.add(display_name)
all_display_names = sorted(all_display_names)

# Assign colors consistently
palette = sns.color_palette("tab10", n_colors=len(all_display_names))
color_map = {name: palette[i] for i, name in enumerate(all_display_names)}

# Plot each metric
for metric_name, model_data in metrics.items():
    plt.figure(figsize=(8,6))
    for model_name, freq_values in model_data.items():
        y_values = [freq_values.get(freq, None) for freq in frame_freqs]

        # Final display name
        display_name = "DynaStride" if model_name.lower() == "qwen" else model_name

        plt.plot(frame_freqs, y_values, marker='o', label=display_name, color=color_map[display_name])

    plt.title(metric_name, fontsize=14)
    plt.xlabel("Frame Sample Frequency", fontsize=12)
    plt.ylabel(metric_name, fontsize=12)
    plt.xticks(frame_freqs)
    plt.grid(True, linestyle='--', alpha=0.7)
    plt.legend(frameon=True)
    plt.tight_layout()

    save_path = os.path.join(output_dir, f"{metric_name}.png")
    plt.savefig(save_path, dpi=300)
    plt.close()
    print(f"Saved plot: {save_path}")

print("All plots generated in folder:", output_dir)



Saved plot: graphs/PBERT.png
Saved plot: graphs/RBERT.png
Saved plot: graphs/FBERT.png
Saved plot: graphs/SBERTSim.png
Saved plot: graphs/TemporalCoherence_NSP_true.png
Saved plot: graphs/TemporalCoherence_NSP_shuffled.png
Saved plot: graphs/TemporalCoherence_NSP_delta.png
Saved plot: graphs/TemporalAlignment_DTW.png
Saved plot: graphs/TemporalContradictionRate_NLI.png
Saved plot: graphs/BLEU4.png
Saved plot: graphs/METEOR.png
Saved plot: graphs/CIDEr.png
All plots generated in folder: graphs


In [5]:
!pip install matplotlib

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m


In [6]:
!pip install seaborn

Collecting seaborn
  Downloading seaborn-0.13.2-py3-none-any.whl.metadata (5.4 kB)
Collecting pandas>=1.2 (from seaborn)
  Downloading pandas-2.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (91 kB)
Collecting pytz>=2020.1 (from pandas>=1.2->seaborn)
  Downloading pytz-2025.2-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas>=1.2->seaborn)
  Downloading tzdata-2025.2-py2.py3-none-any.whl.metadata (1.4 kB)
Downloading seaborn-0.13.2-py3-none-any.whl (294 kB)
Downloading pandas-2.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.4/12.4 MB[0m [31m236.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pytz-2025.2-py2.py3-none-any.whl (509 kB)
Downloading tzdata-2025.2-py2.py3-none-any.whl (347 kB)
Installing collected packages: pytz, tzdata, pandas, seaborn
Successfully installed pandas-2.3.2 pytz-2025.2 seaborn-0.13.2 tzdata-2025.2
[0m
[1m[[0m[34