# ICLR

In [1]:
import pandas as pd


def format_ICLR_data_to_df(raw_path, source='ICLR', year=2022, present='oral'):
    save_path = raw_path + '_df.csv'
    
    file1 = open(raw_path, 'r')
    Lines = file1.readlines()

    title, authors = [], []

    record_counter = 1
    # Strips the newline character
    for line in Lines:
        if record_counter == 1:
            title.append(line.strip())
        elif record_counter == 2:
            authors.append(line.strip())
        elif 'Show details' in line:
            record_counter = 0
        record_counter += 1

    article_year = [year] * len(title)
    article_source = [source] * len(title)
    article_class = [present] * len(title)


    data_frame = pd.DataFrame({'Title':title, 'Year': article_year, 'Source': article_source, 'Authors': authors, 'Class': article_class})
    return data_frame

def remove_duplicates(data_frame):
    raw_length = len(data_frame)
    data_frame = data_frame.drop_duplicates(subset = "Title")
    print(f"Totally {raw_length} papers, {len(data_frame)} left after remve duplicates.")
    return data_frame

def merge_data2df(oral='./data/ICLR/2022-ICLR-oral.txt', poster='./data/ICLR/2022-ICLR-poster.txt', spotlight='./data/ICLR/2022-ICLR-spotlight.txt'):
    df_ICLR_oral = format_ICLR_data_to_df(oral, present='oral')    
    df_ICLR_poster = format_ICLR_data_to_df(poster, present='poster')
    df_ICLR_spotlight = format_ICLR_data_to_df(spotlight, present='spotlight')

    df_ICLR = pd.concat([df_ICLR_oral, df_ICLR_spotlight, df_ICLR_poster])
    df_ICLR = remove_duplicates(df_ICLR)
    return df_ICLR

## ICLR 2021

In [2]:
df_ICLR_21 = merge_data2df(oral='./data/raw/ICLR/2021-ICLR-oral.txt', poster='./data/raw/ICLR/2021-ICLR-poster.txt', spotlight='./data/raw/ICLR/2021-ICLR-spotlight.txt')
df_ICLR_21[['Title', 'Year', 'Source', 'Authors', 'Class']].to_csv('./data/2021/ICLR_21.csv', index=False)

Totally 860 papers, 860 left after remve duplicates.


## ICLR 2022

In [3]:
df_ICLR_22 = merge_data2df(oral='./data/raw/ICLR/2022-ICLR-oral.txt', poster='./data/raw/ICLR/2022-ICLR-poster.txt', spotlight='./data/raw/ICLR/2022-ICLR-spotlight.txt')
df_ICLR_22['title_lower'] = df_ICLR_22['Title'].map(lambda x: x.lower())
df_ICLR_22[['Title', 'Year', 'Source', 'Authors', 'Class']].to_csv('./data/2022/ICLR_22.csv', index=False)

Totally 1094 papers, 1094 left after remve duplicates.
