In [90]:
import pandas as pd 
import numpy as np
import plotly.express as px
figure_kwargs = {"width": 600, "height": 400, "template": "none"}

In [91]:
data = pd.read_csv("processed_data/automatic_paper_annotations.csv")
data.fillna("", inplace=True)
data.rename(columns={"seeds": "Number of Seeds", "env_version_specified": "Environment fully specified", "hyperparameters_detailed": "Hyperparameters specified", "code_available": "Code available", "conference": "Conference"}, inplace=True)
data.replace({"neurips": "NeurIPS", "neurips_2018": "NeurIPS'18", "neurips_2019": "NeurIPS'19", "neurips_2020": "NeurIPS'20", "neurips_2021": "NeurIPS'21", "neurips_2022": "NeurIPS'22", "neurips_2023": "NeurIPS'23", "neurips_2024": "NeurIPS'24",
              "rlj": "RLC", "rlj_2024": "RLC'24",
              "icml": "ICML", "icml_2018": "ICML'18", "icml_2019": "ICML'19", "icml_2020": "ICML'20", "icml_2021": "ICML'21", "icml_2022": "ICML'22", "icml_2023": "ICML'23", "icml_2024": "ICML'24",
              "iclr": "ICLR", "iclr_2018": "ICLR'18", "iclr_2019": "ICLR'19", "iclr_2020": "ICLR'20", "iclr_2021": "ICLR'21", "iclr_2022": "ICLR'22", "iclr_2023": "ICLR'23", "iclr_2024": "ICLR'24"}, inplace=True)

In [92]:
# Total at conferences
y_axis = {"yaxis_title": "Number of Papers"}
fig = px.histogram(data, x='year', color='Conference', **figure_kwargs).update_layout(title="Total RL Papers", xaxis_title="Year", **y_axis)
fig.write_image("plots/automated_analysis_total.png", scale=3)
fig.show()

In [93]:
# Code
fig = px.histogram(data, x='conf_id', color='Code available', **figure_kwargs).update_layout(title="RL Papers With Code per Conference & Year", xaxis_title="Conference Edition", **y_axis)
fig.write_image("plots/automated_analysis_code_by_conf_id.png", scale=3)
fig.show()
fig = px.histogram(data, x='Conference', color='Code available', **figure_kwargs).update_layout(title="RL Papers With Code per Conference", xaxis_title="Conference", **y_axis)
fig.write_image("plots/automated_analysis_code_by_conf.png", scale=3)
fig.show()
fig = px.histogram(data, x='year', color='Code available', **figure_kwargs).update_layout(title="RL Papers With Code per Year", xaxis_title="Year", **y_axis)
fig.write_image("plots/automated_analysis_code_by_year.png", scale=3)
fig.show()

In [94]:
# Hps
fig = px.histogram(data, x='conf_id', color='Hyperparameters specified', **figure_kwargs).update_layout(title="Hyperparameter Specification in RL Papers per Conference & Year", xaxis_title="Conference Edition", **y_axis)
fig.write_image("plots/automated_analysis_hps_by_conf_id.png", scale=3)
fig.show()
fig = px.histogram(data, x='Conference', color='Hyperparameters specified', **figure_kwargs).update_layout(title="Hyperparameter Specification in RL Papers per Conference", xaxis_title="Conference", **y_axis)
fig.write_image("plots/automated_analysis_hps_by_conf.png", scale=3)
fig.show()
fig = px.histogram(data, x='year', color='Hyperparameters specified', **figure_kwargs).update_layout(title="Hyperparameter Specification in RL Papers per Year", xaxis_title="Year", **y_axis)
fig.write_image("plots/automated_analysis_hps_by_year.png", scale=3)
fig.show()

In [95]:
# Env spec
fig = px.histogram(data, x='conf_id', color='Environment fully specified', **figure_kwargs).update_layout(title="Environment Specification in RL Papers per Conference & Year", xaxis_title="Conference Edition", **y_axis)
fig.write_image("plots/automated_analysis_env_by_conf_id.png", scale=3)
fig.show()
fig = px.histogram(data, x='Conference', color='Environment fully specified', **figure_kwargs).update_layout(title="Environment Specification in RL Papers per Conference", xaxis_title="Conference", **y_axis)
fig.write_image("plots/automated_analysis_env_by_conf.png", scale=3)
fig.show()
fig = px.histogram(data, x='year', color='Environment fully specified', **figure_kwargs).update_layout(title="Environment Specification in RL Papers per Year", xaxis_title="Year", **y_axis)
fig.write_image("plots/automated_analysis_env_by_year.png", scale=3)
fig.show()

In [96]:
fig = px.histogram(data, x='conf_id', color='Number of Seeds', category_orders={"Number of Seeds": ["0", "1-5", "6-10", "over 10"]}, **figure_kwargs).update_layout(title="Number of Seeds in RL Papers per Conference & Year", xaxis_title="Conference Edition", **y_axis)
fig.write_image("plots/automated_analysis_seeds_by_conf_id.png", scale=3)
fig.show()
fig = px.histogram(data, x='Conference', color='Number of Seeds', category_orders={"Number of Seeds": ["0", "1-5", "6-10", "over 10"]}, **figure_kwargs).update_layout(title="Number of Seeds in RL Papers per Conference", xaxis_title="Conference", **y_axis)
fig.write_image("plots/automated_analysis_seeds_by_conf.png", scale=3)
fig.show()
fig = px.histogram(data, x='year', color='Number of Seeds', category_orders={"Number of Seeds": ["0", "1-5", "6-10", "over 10"]}, **figure_kwargs).update_layout(title="Number of Seeds in RL Papers per Year", xaxis_title="Year", **y_axis)
fig.write_image("plots/automated_analysis_seeds_by_year.png", scale=3)
fig.show()

In [97]:
# Algorithms mentioned at least 3 times
counts = []
for c in data.columns:
    if "algorithm" in c:
        counts.append(data[c].unique())
all_algorithms = list(set(np.concatenate(counts)))
years = []
conferences = []
algos = []
counts = []
for a in all_algorithms:
    if a != "" and a != " " and a != "-" and a != "Not specified":
        for year in data['year'].unique():
            for conference in data['Conference'].unique():
                c = 0
                all = []
                for col in data.columns:
                    if "algorithm" in col:
                        c += data[(data['year'] == year) & (data['Conference'] == conference)][col].str.contains(a).sum()
                        all.append(data[(data['year'] == year) & (data['Conference'] == conference)][col].unique())
                years.append(year)
                conferences.append(conference)
                algos.append(a)
                counts.append(c)
count_df = pd.DataFrame({'year': years, 'Conference': conferences, 'algorithm': algos, 'counts': counts})
for a in algos:
    if count_df[count_df['algorithm'] == a]['counts'].sum() < 3:
        count_df = count_df[count_df['algorithm'] != a]

In [98]:
fig = px.histogram(count_df, y="counts", x="algorithm", **figure_kwargs).update_xaxes(categoryorder="total descending").update_layout(xaxis_title="Algorithm", yaxis_title="Frequency of Algorithm Usage", title="Algorithm Usage in Papers")
fig.write_image("plots/automated_analysis_algorithms.png", scale=3)
fig.show()
fig = px.histogram(count_df, y="counts", x="algorithm", color="year", **figure_kwargs).update_xaxes(categoryorder="total descending").update_layout(xaxis_title="Algorithm", yaxis_title="Frequency of Algorithm Usage", title="Algorithm Usage in Papers by Year")
fig.write_image("plots/automated_analysis_algorithms_by_year.png", scale=3)
fig.show()
fig = px.histogram(count_df, y="counts", x="algorithm", color="Conference", **figure_kwargs).update_xaxes(categoryorder="total descending").update_layout(xaxis_title="Algorithm", yaxis_title="Frequency of Algorithm Usage", title="Algorithm Usage in Papers by Conference")
fig.write_image("plots/automated_analysis_algorithms_by_conf.png", scale=3)
fig.show()