In [118]:
import pandas as pd 
import numpy as np
import plotly.express as px

figure_kwargs = {"width": 600, "height": 400, "template": "none"}

In [119]:
# Remove older neurips annotations & make labels prettier
data = pd.read_csv("processed_data/manual_paper_annotations.csv")
data = data[data["year"] == 24]
data.rename(columns={"seeds": "Number of Seeds", "environment_spec": "Environment fully specified", "hyperparams_spec": "Hyperparameters specified", "code_available": "Code available"}, inplace=True)
data.replace({"neurips_24": "NeurIPS'24", "rlc_24": "RLC'24", "icml_24": "ICML'24", "iclr_24": "ICLR'24"}, inplace=True)
data.fillna("", inplace=True)
empirical_data = data[data["empirical"] == "Yes"]


Downcasting behavior in `replace` is deprecated and will be removed in a future version. To retain the old behavior, explicitly call `result.infer_objects(copy=False)`. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`


Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.



In [123]:
# Empirical vs non-empirical papers
axes = {"xaxis_title": "Conference", "yaxis_title": "Number of Papers (Orals & Awards)"}
fig = px.histogram(data, x='conf_id', color='empirical', **figure_kwargs).update_layout(title="Empirical vs Non-Empirical Orals & Awards", **axes)
fig.write_image("plots/manual_analysis_empirical.svg")
fig.show()

# Code
fig = px.histogram(empirical_data, x='conf_id', color='Code available', **figure_kwargs).update_layout(title="Papers with Code Available", **axes)
fig.write_image("plots/manual_analysis_code.svg")
fig.show()

# HPs
fig = px.histogram(empirical_data, x='conf_id', color='Hyperparameters specified', **figure_kwargs).update_layout(title="Hyperparameter Specification", **axes)
fig.write_image("plots/manual_analysis_hps.svg")
fig.show()

# Env
fig = px.histogram(empirical_data, x='conf_id', color='Environment fully specified', **figure_kwargs).update_layout(title="Full Environment Specification", **axes)
fig.write_image("plots/manual_analysis_env.svg")
fig.show()

# Seeds
fig = px.histogram(empirical_data, x='conf_id', color='Number of Seeds', category_orders={"Number of Seeds": ["0", "1-5", "6-10", "over 10"]}, **figure_kwargs).update_layout(title="Number of Seeds", **axes)
fig.write_image("plots/manual_analysis_seeds.svg")
fig.show()

In [121]:
# Algorithms mentioned at least 3 times
counts = []
for c in data.columns:
    if "algorithm" in c:
        counts.append(data[c].unique())
all_algorithms = list(set(np.concatenate(counts)))
years = []
conferences = []
algos = []
counts = []
for a in all_algorithms:
    if a != "" and a != " " and a != "-":
        for year in data['year'].unique():
            for conference in data['conference'].unique():
                c = 0
                all = []
                for col in data.columns:
                    if "algorithm" in col:
                        c += data[(data['year'] == year) & (data['conference'] == conference)][col].str.contains(a).sum()
                        all.append(data[(data['year'] == year) & (data['conference'] == conference)][col].unique())
                years.append(year)
                conferences.append(conference)
                algos.append(a)
                counts.append(c)
count_df = pd.DataFrame({'year': years, 'conference': conferences, 'algorithm': algos, 'counts': counts})
for a in algos:
    if count_df[count_df['algorithm'] == a]['counts'].sum() < 3:
        count_df = count_df[count_df['algorithm'] != a]

In [None]:
fig = px.histogram(count_df, y="counts", x="algorithm", **figure_kwargs).update_xaxes(categoryorder="total descending").update_layout(xaxis_title="Algorithm", yaxis_title="Frequency of Algorithm Usage", title="Algorithm Usage in Papers")
fig.write_image("plots/manual_analysis_algorithms.svg")
fig.show()
fig = px.histogram(count_df, y="counts", x="algorithm", color="conference", **figure_kwargs).update_xaxes(categoryorder="total descending").update_layout(xaxis_title="Algorithm", yaxis_title="Frequency of Algorithm Usage", title="Algorithm Usage in Papers by Conference")
fig.write_image("plots/manual_analysis_algorithms_per_conf.svg")
fig.show()