In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

sns.set_theme(style="whitegrid")
print("Libraries loaded successfully.")

Libraries loaded successfully.


In [2]:
# load results.csv into pandas dataframe

try:
    df = pd.read_csv("llm_bug_analysis/results/results.csv")

    # display first 5 rows to see if it loaded correctly
    print("Data loaded successfully. First 5 rows:")
    display(df.head())

    # print summary of data types, check for issues
    print("\nData columns and types:")
    df.info()

except FileNotFoundError:
    print("ERROR: results.csv not found. Make sure you have run the analysis pipeline first.")

Data loaded successfully. First 5 rows:


Unnamed: 0,timestamp,repo_name,bug_commit_sha,file_path,commit_message,issue_title,issue_body,llm_model,complexity_before_cc,complexity_before_cognitive,...,complexity_after_llm_avg_params,complexity_after_llm_total_tokens,human_tests_passed,human_lines_added,human_lines_deleted,human_total_diff,complexity_after_human_cc,complexity_after_human_cognitive,complexity_after_human_avg_params,complexity_after_human_total_tokens
0,2025-10-11T15:02:54.981034,mahmoud/boltons,4815fc8dd1768da5f2d903846d2ab994aa57b0cf,,Test and fix for #348 (#349),LRU .values() and dict return old entries,"Hi,\r\n\r\nFirst of all thanks for the excelle...",manual_llm,198,135,...,SKIPPED,SKIPPED,True,44,9,53,199,135,1.68,5014



Data columns and types:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1 entries, 0 to 0
Data columns (total 29 columns):
 #   Column                               Non-Null Count  Dtype  
---  ------                               --------------  -----  
 0   timestamp                            1 non-null      object 
 1   repo_name                            1 non-null      object 
 2   bug_commit_sha                       1 non-null      object 
 3   file_path                            0 non-null      float64
 4   commit_message                       1 non-null      object 
 5   issue_title                          1 non-null      object 
 6   issue_body                           1 non-null      object 
 7   llm_model                            1 non-null      object 
 8   complexity_before_cc                 1 non-null      int64  
 9   complexity_before_cognitive          1 non-null      int64  
 10  complexity_before_avg_params         1 non-null      float64
 11  complexity_

In [3]:
# convert llm tests passed into numeric type

df_ai = df[df['llm_tests_passed'] != 'SKIPPED'].copy()
if df_ai.empty:
    print("No LLM results found in the data. Skipping AI success rate calculation.")
else:
    # this part only runs if there is data
    df_ai['llm_success'] = df_ai['llm_tests_passed'].astype(bool)
    overall_success_rate = df_ai['llm_success'].mean()
    print(f"Overall LLM Success Rate (where attempted): {overall_success_rate:.2%}")

No LLM results found in the data. Skipping AI success rate calculation.


In [4]:
# group data by repo and calculate success rate for each

if df_ai.empty:
    print("No LLM results found in the data. Skipping the repository summary plot.")
else:
    # this part only runs if there is data about the llm fix
    success_by_repo = df_ai.groupby('repo_name')['llm_success'].mean().sort_values(ascending=False)

    print("LLM Success Rate by Repository:")
    print(success_by_repo.apply('{:.2%}'.format))

    # Create a bar chart to visualize this.
    plt.figure(figsize=(10, 6))
    success_by_repo.plot(kind='bar')
    plt.title('LLM Test Pass Rate by Repository')
    plt.ylabel('Success Rate')
    plt.xlabel('Repository')
    plt.xticks(rotation=45, ha='right')
    plt.gca().yaxis.set_major_formatter('{:.0%}'.format)
    plt.tight_layout()
    plt.show()

No LLM results found in the data. Skipping the repository summary plot.
