In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import json
import glob
from matplotlib.colors import LinearSegmentedColormap

MODEL_NAME = "LLaMA 2 7B continue-trained on 5B tokens 80K length Per-source length upsampled data"
PRETRAINED_LEN=81920

RES_PREFIX = "needle_results/"
res_lis = [
    # "None-32000"

    ########## k2v2 g128
    # "k2-v2-g128-w128-rod-clip0.92-pre_rope-sink5-fp8-32000"
    # "k2-v2-g128-w128-clip1.0-KIVI-32000"

    ########## k2v2 g64
    # "k2-v2-g64-w128-rod-clip0.92-pre_rope-sink5-fp8-32000"
    # "k2-v2-g64-w128-KIVI-32000"

    ########## k2v1.5 g128
    # "k2-v1.5-g128-w128-rod-clip0.92-pre_rope-sink5-fp8-32000"
    # "k2-v1.5-g128-w128-KIVI-32000"
]


FOLDER_PATH = f"{RES_PREFIX}llama2-7b-80k/{res_lis[0]}/"

def main():
    # Path to the directory containing JSON results
    folder_path = FOLDER_PATH
    if("/" in folder_path):
        model_name = folder_path.split("/")[-2]
    else: model_name = MODEL_NAME
    print("model_name = %s" % model_name)

    # Using glob to find all json files in the directory
    json_files = glob.glob(f"{folder_path}*.json")

    # List to hold the data
    data = []

    # Iterating through each file and extract the 3 columns we need
    for file in json_files:
        with open(file, 'r') as f:
            json_data = json.load(f)
            # Extracting the required fields
            document_depth = json_data.get("depth_percent", None)
            context_length = json_data.get("context_length", None)
            score = json_data.get("score", None)["rouge1_f1"]
            model_response = json_data.get("model_response", None).lower()
            expected_answer = "eat a sandwich and sit in Dolores Park on a sunny day.".lower().split()
            score = len(set(model_response.split()).intersection(set(expected_answer))) / len(expected_answer)
            # Appending to the list
            data.append({
                "Document Depth": document_depth,
                "Context Length": context_length,
                "Score": score
            })

    # Creating a DataFrame
    df = pd.DataFrame(data)
    locations = list(df["Context Length"].unique())
    locations.sort()
    for li, l in enumerate(locations):
        if(l > PRETRAINED_LEN): break
    pretrained_len = li

    print(df.head())

    overall_score = df["Score"].mean()
    print("Overall score %.3f" % overall_score)

    pivot_table = pd.pivot_table(df, values='Score', index=['Document Depth', 'Context Length'], aggfunc='mean').reset_index() # This will aggregate
    pivot_table = pivot_table.pivot(index="Document Depth", columns="Context Length", values="Score") # This will turn into a proper pivot
    pivot_table.iloc[:5, :5]
    overall_score = pivot_table.sum().sum()

    # Create a custom colormap. Go to https://coolors.co/ and pick cool colors
    cmap = LinearSegmentedColormap.from_list("custom_cmap", ["#F0496E", "#EBB839", "#0CD79F"])

    # Create the heatmap with better aesthetics
    fig = plt.figure(figsize=(6.5, 4))  # Can adjust these dimensions as needed
    sns.heatmap(
        pivot_table,
        vmin=0, vmax=1,
        cmap=cmap,
        cbar_kws={'label': 'Score'},
        linewidths=0.5,  # Adjust the thickness of the grid lines here
        linecolor='grey',  # Set the color of the grid lines
        linestyle='--'
    )

    plt.title(f"OverAll score: {overall_score}")
    plt.xlabel('Token Limit', fontsize=18)
    plt.ylabel('Depth Percent', fontsize=18)
    plt.xticks(rotation=45)
    plt.yticks(rotation=0)
    plt.tight_layout()

main()