In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from wordcloud import WordCloud
from collections import Counter
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.util import bigrams
import geopandas as gpd

pd.set_option('display.max_rows', None)  # Show all rows
pd.set_option('display.max_columns', None)  # Show all columns

In [2]:
df_complete = pd.read_excel('results/final/complete.xlsx', sheet_name='AI_ICSE2025_papers')

df_relevant = df_complete[df_complete['relevant'] == True]

print("Columns")
df_complete.columns

Columns


Index(['reviewer', 'relevant', 'year', 'title', 'authors', 'url', 'abstract',
       'artifact_available', 'artifact_reusable', 'artifact_functional', 'ai',
       'Task being solved  (CODE GENERATION;APR;TESTING;COMMENT GENERATION/...)',
       'AA Task Consolidated',
       'Do the papers evaluate with approaches other than LLM? (TRUE/FALSE)',
       'Is the budget fair (on hold)',
       'Do they evaluate with open source/commercial/open weight models? (COMMERCIAL/OPEN SOURCE/OPEN WEIGHT)',
       'DW Open/Closed Consolidated ',
       'How many models overall, benchmarks and approach? (integer)',
       'DW Num. Models Conslidated', 'Name of models',
       'DW Scale of models (LARGE >50B/MEDIUM>1B/SMALL<1B)',
       'Size of models from the paper (free text from the paper)',
       'DW Model Sizes Reported (none, partial, full, N/A for commercial)',
       'Do they report on the model versions? (TRUE/FALSE)',
       'Configuration of models (TRUE/FALSE, or parameters)',
       'AA

In [3]:
df_complete['relevant'].value_counts()

relevant
True       164
False       50
SPECIAL      3
Name: count, dtype: int64

In [4]:
# Number of relevant papers per year
df_relevant['year'].value_counts()

year
2025.0    89
2024.0    53
2023.0    22
Name: count, dtype: int64

In [5]:
print(df_relevant["AA Task Consolidated"].unique())

['vulnerability detection' 'test generation' 'test repair'
 'code refinement' 'code generation' 'comment repair' 'program repair'
 'SO post editing' 'code reasoning; code translation' 'code reasoning'
 'code translation' 'bug detection' 'security patch detection'
 'model completion' 'commit message generation'
 'traceability link recovery' 'log parsing' 'AI generated code detection'
 'code generation; code summarization' 'program analysis'
 'regression testing' 'code search' 'fuzzing'
 'inconsistency prediction in decentralised apps' 'formal verification'
 'bug report comprehension' 'UI design repair' 'type detection'
 'smart contract auditing' 'configuration validation'
 'detection of code design issues' 'code understanding; code generation'
 'code completion' 'clone detection; vulnerability detection'
 'code adaptation' 'fault localisation; bug detection'
 'code summarization' 'code optimisation' 'security injection'
 'machine-generated code detection' 'root cause analysis'
 'code su

In [6]:
# Split the 'AA Task Consolidated' column into lists
df_relevant['AA Task List'] = df_relevant['AA Task Consolidated'].apply(
    lambda x: [task.strip() for task in str(x).split(';')] if pd.notna(x) else []
)

df_relevant[['AA Task Consolidated', 'AA Task List']].head(50)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_relevant['AA Task List'] = df_relevant['AA Task Consolidated'].apply(


Unnamed: 0,AA Task Consolidated,AA Task List
0,vulnerability detection,[vulnerability detection]
1,test generation,[test generation]
2,test repair,[test repair]
4,code refinement,[code refinement]
6,test generation,[test generation]
7,code generation,[code generation]
9,test generation,[test generation]
10,code generation,[code generation]
11,test generation,[test generation]
13,code generation,[code generation]


In [7]:
from collections import Counter

all_tasks = []
for task_list in df_relevant['AA Task List']:
    all_tasks.extend(task_list)

task_counts = Counter(all_tasks)

task_frequency_df = pd.DataFrame(task_counts.items(), columns=['Task', 'Paper_Count'])
task_frequency_df = task_frequency_df.sort_values('Paper_Count', ascending=False, ignore_index=True)

print(f"Total unique tasks: {len(task_frequency_df)}")
task_frequency_df

Total unique tasks: 68


Unnamed: 0,Task,Paper_Count
0,code generation,25
1,test generation,24
2,program repair,22
3,vulnerability detection,13
4,bug detection,8
5,fuzzing,5
6,log parsing,5
7,type detection,5
8,code completion,5
9,code translation,5


In [None]:
# Looking at how many papers cover multiple tasks
df_relevant['num_tasks'] = df_relevant['AA Task List'].apply(len)

print("Distribution of number of tasks per paper:")
print(df_relevant['num_tasks'].value_counts().sort_index())
print(f"\nPapers covering multiple tasks: {(df_relevant['num_tasks'] > 1).sum()}")
print(f"Percentage covering multiple tasks: {(df_relevant['num_tasks'] > 1).mean()*100:.1f}%")

print(df_relevant[df_relevant['num_tasks'] > 5][['title', 'AA Task List']])

Distribution of number of tasks per paper:
num_tasks
1     149
2       9
3       5
13      1
Name: count, dtype: int64

Papers covering multiple tasks: 15
Percentage covering multiple tasks: 9.1%
                                                 title  \
205  An Empirical Comparison of Pre-Trained Models ...   

                                          AA Task List  
205  [bug detection, clone detection, type detectio...  


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_relevant['num_tasks'] = df_relevant['AA Task List'].apply(len)


# Artifact Badges vs Our Counts

## Badges from ACM Data Collection

In [9]:
# Overall
print(df_relevant['artifact_available'].value_counts())
print()
print(df_relevant['artifact_reusable'].value_counts())
print()
print(df_relevant['artifact_functional'].value_counts())

artifact_available
False    132
True      32
Name: count, dtype: int64

artifact_reusable
False    143
True      21
Name: count, dtype: int64

artifact_functional
False    150
True      14
Name: count, dtype: int64


In [10]:
# Per year
print("Artifact Available (ACM Badges)")
print("2023")
print(df_relevant[df_relevant['year'] == 2023]['artifact_available'].value_counts())
print("\n2024")
print(df_relevant[df_relevant['year'] == 2024]['artifact_available'].value_counts())
print("\n2025")
print(df_relevant[df_relevant['year'] == 2025]['artifact_available'].value_counts())


print("\n\nArtifact Reusable (ACM Badges)")
print("2023")
print(df_relevant[df_relevant['year'] == 2023]['artifact_reusable'].value_counts())
print("\n2024")
print(df_relevant[df_relevant['year'] == 2024]['artifact_reusable'].value_counts())
print("\n2025")
print(df_relevant[df_relevant['year'] == 2025]['artifact_reusable'].value_counts())


print("\n\nArtifact Functional (ACM Badges)")
print("2023")
print(df_relevant[df_relevant['year'] == 2023]['artifact_functional'].value_counts())
print("\n2024")
print(df_relevant[df_relevant['year'] == 2024]['artifact_functional'].value_counts())
print("\n2025")
print(df_relevant[df_relevant['year'] == 2025]['artifact_functional'].value_counts())

Artifact Available (ACM Badges)
2023
artifact_available
False    17
True      5
Name: count, dtype: int64

2024
artifact_available
False    45
True      8
Name: count, dtype: int64

2025
artifact_available
False    70
True     19
Name: count, dtype: int64


Artifact Reusable (ACM Badges)
2023
artifact_reusable
False    20
True      2
Name: count, dtype: int64

2024
artifact_reusable
False    45
True      8
Name: count, dtype: int64

2025
artifact_reusable
False    78
True     11
Name: count, dtype: int64


Artifact Functional (ACM Badges)
2023
artifact_functional
False    21
True      1
Name: count, dtype: int64

2024
artifact_functional
False    53
Name: count, dtype: int64

2025
artifact_functional
False    76
True     13
Name: count, dtype: int64


## Artifact Availability from Manual Checking

In [11]:
# Overall
print(df_relevant['MK Artefact available consolidated'].value_counts())

MK Artefact available consolidated
True     129
False     24
DEAD      11
Name: count, dtype: int64


In [12]:
# Per year
print("2023 Artifacts (Manual Checking)")
print(df_relevant[df_relevant['year'] == 2023]['MK Artefact available consolidated'].value_counts())
print("\n2024 Artifacts (Manual Checking)")
print(df_relevant[df_relevant['year'] == 2024]['MK Artefact available consolidated'].value_counts())
print("\n2025 Artifacts (Manual Checking)")
print(df_relevant[df_relevant['year'] == 2025]['MK Artefact available consolidated'].value_counts())

2023 Artifacts (Manual Checking)
MK Artefact available consolidated
True     15
False     4
DEAD      3
Name: count, dtype: int64

2024 Artifacts (Manual Checking)
MK Artefact available consolidated
True     40
False     9
DEAD      4
Name: count, dtype: int64

2025 Artifacts (Manual Checking)
MK Artefact available consolidated
True     74
False    11
DEAD      4
Name: count, dtype: int64
