In [None]:
import json
from pathlib import Path
from tqdm import tqdm
import math

# Path to your JSONL file
file_path = "/project/flame/asetlur/pipeline-rl/0.jsonl"


def process_text(text):
    return text.split("<|im_start|>user\n")[1].split("<|im_end|>\n<|im_start|>assistant")[0]

def extract_text_and_reward_jsonl(input_path, output_path):
    """
    Stream a big JSONL file where each line is a list of 8 entries.
    For each entry, write out {"text": ..., "reward": ...} as a new line.
    """
    with open(input_path, "r", encoding="utf-8") as fin, \
         open(output_path, "w", encoding="utf-8") as fout:

        for line in tqdm(fin, desc="Processing rows"):
            line = line.strip()
            if not line:
                continue

            # Each row is a list of entries (e.g., length 8)
            try:
                row = json.loads(line)
            except json.JSONDecodeError:
                # optionally log or skip bad lines
                continue

            row_text = None
            row_model_version = None
            rewards = []
            for entry in row:
                # Safely get fields
                text = process_text(entry.get("text"))
                model_version = entry.get("metadata")["model_version"]

                if row_text is None: 
                    row_text = text
                else: 
                    assert text == row_text
                if row_model_version is None:
                    row_model_version = model_version
                else:
                    try:
                        assert abs(row_model_version-model_version) <= 1024
                    except:
                        print(model_version, row_model_version)
                        raise AssertionError

                rewards.append(entry.get("reward"))
            
            try:
                assert len(rewards) > 0
            except:
                print(rewards)
                raise AssertionError
                
            assert row_text is not None
            assert row_model_version is not None
            out_obj = {"text": row_text, "rewards": rewards, "model_version": row_model_version}
            fout.write(json.dumps(out_obj, ensure_ascii=False) + "\n")


extract_text_and_reward_jsonl(
    file_path, 
    file_path.split(".")[0]+"_processed.jsonl"
)




In [None]:
import pandas as pd

# Load the processed JSONL file as a dataframe
df = pd.read_json(
    file_path.split(".")[0]+"_processed.jsonl",
    lines=True
)

print(f"Loaded {len(df)} rows")
print(f"\nDataframe shape: {df.shape}")
print(f"\nColumns: {df.columns.tolist()}")
print(f"\nFirst few rows:")
df.head()




from datasets import load_dataset

# Load dataset from Hugging Face
dataset = load_dataset("CohenQu/POPE-MIX-first_guide-no_guide-0.0-0.64-1024-verl")

# Convert to pandas DataFrame
# If the dataset has multiple splits, you might need to specify which one
# For example: dataset['train'] or dataset['validation']
if isinstance(dataset, dict):
    # If multiple splits, use the first one (usually 'train')
    split_name = list(dataset.keys())[0]
    df_hf = dataset[split_name].to_pandas()
    print(f"Dataset has splits: {list(dataset.keys())}")
    print(f"Using split: {split_name}")
else:
    df_hf = dataset.to_pandas()

df_hf['text'] = df_hf['prompt'].apply(lambda x: x[0]['content'])


print(f"\nLoaded {len(df_hf)} rows from Hugging Face")
print(f"\nDataframe shape: {df_hf.shape}")
print(f"\nColumns: {df_hf.columns.tolist()}")
print(f"\nFirst few rows:")
df_hf.head()


text_to_data_source = df_hf.groupby('text')['level'].first().to_dict()

df['source'] = df['text'].apply(lambda x: text_to_data_source[x])


df['all_negative'] = df['rewards'].apply(lambda x: float(sum(x) == 0.))

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Group by model_version and source, then average all_negative
df_plot = df.groupby(['model_version', 'source'])['all_negative'].mean().reset_index()

# Pivot to have sources as columns for easier plotting
df_pivot = df_plot.pivot(index='model_version', columns='source', values='all_negative')

# Sort by model_version to ensure proper ordering for running average
df_pivot = df_pivot.sort_index()

# Plot
plt.figure(figsize=(12, 6))

# Get a colormap for different sources
colors = plt.cm.tab10(np.linspace(0, 1, len(df_pivot.columns)))

for i, source in enumerate(df_pivot.columns):
    color = colors[i]
    # Lighter color for original (reduce alpha and brightness)
    light_color = (*color[:3], 0.3)  # Lighter with transparency
    # Darker color for smoothed (full opacity)
    dark_color = tuple(c * 0.7 for c in color[:3]) + (1.0,)  # Darker version
    
    x = df_pivot.index
    y_original = df_pivot[source]
    
    # Calculate running average (window size of 3, adjust as needed)
    window_size = 10
    y_smoothed = y_original.rolling(window=window_size, center=True, min_periods=1).mean()
    
    # Plot original values in lighter color
    plt.plot(x, y_original, marker='o', color=light_color, markersize=3, 
             label=None, alpha=0.4, linewidth=1)
    
    # Plot smoothed values in darker color
    plt.plot(x, y_smoothed, marker='o', color=dark_color, markersize=4,
             label=f'{source}', linewidth=2)

plt.xlabel('Model Version')
plt.ylabel('Average all_negative')
plt.title('all_negative averaged across model_version, per source')
plt.legend(title='Source', ncol=2, fontsize=9)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()