In [3]:
import json
from collections import defaultdict
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Load data
data = []
with open("/home/XXX/CodeSemantic/CodeSemantic/statement_Accuracy_Results/statement_results.jsonl", "r") as f:
    for line in f:
        data.append(json.loads(line))

# Organize results
model_results = defaultdict(lambda: {
    "quantized": defaultdict(float),
    "non_quantized": defaultdict(float)
})

# Paid models to exclude (if needed later)
PAID_MODELS = {
    "anthropic.claude-3-5-sonnet-20241022-v2:0",
    "gemini-1.5-flash-002",
}

# Populate model results
for entry in data:
    model = entry["Model"]
    quant = "quantized" if entry["quantization"] == "yes" else "non_quantized"
    shot = entry["shot"]
    accuracy = entry["accuracy"]
    model_results[model][quant][shot] = accuracy

# All models present in data
models = list(model_results.keys())

# Shots list
shots = [0, 1, 2, 3]

# Prepare rows for DataFrame
rows = []
for model in models:
    for quant_mode in ["quantized", "non_quantized"]:
        quant_label = "Yes" if quant_mode == "quantized" else "No"
        for shot in shots:
            if quant_label == "No":
                accuracy = model_results[model][quant_mode][shot]
                rows.append({
                    "Model": model,
                    "Quantized": quant_label,
                    "Shot": shot,
                    "Accuracy": accuracy
                })

# Create DataFrame
df = pd.DataFrame(rows)

# Save flat table to Excel
output_path = "/home/XXX/CodeSemantic/CodeSemantic/statement_Accuracy_Results/shotaccuracy.xlsx"
df.to_excel(output_path, index=False)

print(f"Excel file saved to {output_path}")


Excel file saved to /home/XXX/CodeSemantic/CodeSemantic/statement_Accuracy_Results/shotaccuracy.xlsx


In [4]:
import pandas as pd

# Load the flat table you have already created
df = pd.read_excel("/home/XXX/CodeSemantic/CodeSemantic/statement_Accuracy_Results/shotaccuracy.xlsx")

# Pivot: Rows = Model + Quantized, Columns = Shot, Values = Accuracy
pivot_df = df.pivot_table(index=["Model", "Quantized"], columns="Shot", values="Accuracy")

# Optional: Rename shot columns to be more readable
pivot_df.columns = [f"Shot_{col}" for col in pivot_df.columns]

# Reset index to flatten the DataFrame
pivot_df = pivot_df.reset_index()

# Save the pivoted DataFrame to a new Excel file
output_path = "/home/XXX/CodeSemantic/CodeSemantic/statement_Accuracy_Results/shotaccuracy_pivoted.xlsx"
pivot_df.to_excel(output_path, index=False)

print(f"Pivoted Excel file saved to {output_path}")


Pivoted Excel file saved to /home/XXX/CodeSemantic/CodeSemantic/statement_Accuracy_Results/shotaccuracy_pivoted.xlsx


In [6]:
import json
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.patches import Patch

# Define reasoning models
REASONING_MODELS = {
    "DeepSeek-R1-Distill-Qwen-7B",
    "DeepSeek-R1-Distill-Llama-8B",
    "DeepSeek-R1-Distill-Qwen-14B",
    "granite-3.2-8b-instruct",
    "granite-3.2-8b-instruct-preview",
}

# Define paid models to exclude
PAID_MODELS = {
    "anthropic.claude-3-5-sonnet-20241022-v2:0",
    "gemini-1.5-flash-002",
    "gpt-4o-mini"
}

# Load and process data
data = []
with open("/home/XXX/CodeSemantic/CodeSemantic/loop_Accuracy_Results/loop_python_results.jsonl", "r") as f:
    for line in f:
        data.append(json.loads(line))

# Process data into DataFrame and handle duplicates
loop_data = []
for record in data:

    loop_data.append({
        "Model": record["Model"],
        "Accuracy": record["accuracy"],
        "Shots": record["shot"],
        "LoopSetting": record["settings"],
        "quantization": record["quantization"],
        "CoT": record["CoT"],  
        "incontext": record.get("incontext", "different"),
        "IsReasoning": record["Model"] in REASONING_MODELS
    })

df = pd.DataFrame(loop_data)
print("Total records:", len(df))

# Filter for required condition: shot = 3, incontext = different, quantization = no
df = df[(df['quantization'] == 'no') & (df['Shots'] == 3) & (df['incontext'] == 'different') & (df['LoopSetting'] == "after")]
print("Filtered records:", len(df))
print(df)

# Save the pivoted DataFrame to a new Excel file
output_path = "./shotaccuracy_pivoted.xlsx"
df.to_excel(output_path, index=False)

print(f"Pivoted Excel file saved to {output_path}")



Total records: 170
Filtered records: 39
                                         Model  Accuracy  Shots LoopSetting  \
2                        Llama-3.1-8B-Instruct  0.178571      3       after   
8                      Qwen2.5-14B-Instruct-1M  0.285714      3       after   
14                   Qwen2.5-Coder-7B-Instruct  0.178571      3       after   
19             DeepSeek-Coder-V2-Lite-Instruct  0.214286      3       after   
25                         Phi-4-mini-instruct  0.214286      3       after   
31                       Phi-3.5-mini-instruct  0.142857      3       after   
37                     granite-3.2-8b-instruct  0.107143      3       after   
43                 DeepSeek-R1-Distill-Qwen-7B  0.142857      3       after   
48                DeepSeek-R1-Distill-Llama-8B  0.214286      3       after   
52                DeepSeek-R1-Distill-Qwen-14B  0.321429      3       after   
57             granite-3.2-8b-instruct-preview  0.178571      3       after   
110         