In [2]:
import urllib.request
import feedparser
import tiktoken

import pandas as pd

In [7]:
def build_arxiv_url(categories, keywords, start=0, max_results=5):
    assert len(categories) > 0
    cat = '(cat:' + categories[0]
    for c in categories[1:]:
        cat += '+OR+cat:' + c
    cat += ')'

    assert len(keywords) > 0
    kws = ''
    for i,d in enumerate(keywords):
        for k,v in d.items():
            kws += '(' + k + ':' + v[0]
            for kw in v[1:]:
                kws += '+OR+' + k + ':' + kw
            kws += ')'
        if i < len(keywords) - 1:
            kws += '+AND+'
            
    url = f'http://export.arxiv.org/api/query?search_query={cat}+AND+{kws}&start={start}&max_results={str(max_results)}'
    url = url.replace(' ', '+')
    return url

In [8]:
encoding = tiktoken.encoding_for_model("gpt-4o-mini")

In [9]:
categories = ['cs.CL','cs.AI','cs.LG']
keywords = [{'all':['LLM','"large language model"']},{'all':['Benchmark']}]
max_results = 20

url = build_arxiv_url(categories, keywords, max_results=max_results)


In [10]:
data = urllib.request.urlopen(url)
response = feedparser.parse(data)

entries = response['entries']
print(f"Total Reponses: {response['feed']['opensearch_totalresults']}\n")
total_tokens = 0
for entry in entries:
    print(entry['title'])
    abstract_tokens = len(encoding.encode(entry['summary']))
    total_tokens += abstract_tokens

print('\n--------------')
print(f"Total Tokens: {total_tokens}")
print(f"Average Tokens: {total_tokens/len(entries)}")
print(f"Estimated Tokens: {int(total_tokens/len(entries)*int(response['feed']['opensearch_totalresults']))}")
print(f"Estimate Cost (GPT 4o Mini): {int(total_tokens/len(entries)*int(response['feed']['opensearch_totalresults']))/1000000*0.15}")

Total Reponses: 7567

TruthX: Alleviating Hallucinations by Editing Large Language Models in
  Truthful Space
GenRec: Large Language Model for Generative Recommendation
Line Goes Up? Inherent Limitations of Benchmarks for Evaluating Large
  Language Models
LawBench: Benchmarking Legal Knowledge of Large Language Models
Exploring and Benchmarking the Planning Capabilities of Large Language
  Models
Benchmarking Cognitive Biases in Large Language Models as Evaluators
Sequential Large Language Model-Based Hyper-parameter Optimization
A Fine-tuning Dataset and Benchmark for Large Language Models for
  Protein Understanding
Evaluating Interventional Reasoning Capabilities of Large Language
  Models
Benchmarking Benchmark Leakage in Large Language Models
Mathador-LM: A Dynamic Benchmark for Mathematical Reasoning on Large
  Language Models
Large Language Models as Generalizable Policies for Embodied Tasks
Instruction-Following Evaluation for Large Language Models
ERBench: An Entity-Relations

In [11]:
arxiv_df = pd.DataFrame(entries)

In [12]:
arxiv_df

Unnamed: 0,id,guidislink,link,updated,updated_parsed,published,published_parsed,title,title_detail,summary,summary_detail,authors,author_detail,author,arxiv_comment,links,arxiv_primary_category,tags
0,http://arxiv.org/abs/2402.17811v2,True,http://arxiv.org/abs/2402.17811v2,2024-06-05T11:15:04Z,"(2024, 6, 5, 11, 15, 4, 2, 157, 0)",2024-02-27T14:45:04Z,"(2024, 2, 27, 14, 45, 4, 1, 58, 0)",TruthX: Alleviating Hallucinations by Editing ...,"{'type': 'text/plain', 'language': None, 'base...",Large Language Models (LLMs) sometimes suffer ...,"{'type': 'text/plain', 'language': None, 'base...","[{'name': 'Shaolei Zhang'}, {'name': 'Tian Yu'...",{'name': 'Yang Feng'},Yang Feng,"Accepted to ACL 2024 main conference, Project ...","[{'href': 'http://arxiv.org/abs/2402.17811v2',...","{'term': 'cs.CL', 'scheme': 'http://arxiv.org/...","[{'term': 'cs.CL', 'scheme': 'http://arxiv.org..."
1,http://arxiv.org/abs/2307.00457v2,True,http://arxiv.org/abs/2307.00457v2,2023-07-04T20:04:58Z,"(2023, 7, 4, 20, 4, 58, 1, 185, 0)",2023-07-02T02:37:07Z,"(2023, 7, 2, 2, 37, 7, 6, 183, 0)",GenRec: Large Language Model for Generative Re...,"{'type': 'text/plain', 'language': None, 'base...","In recent years, large language models (LLM) h...","{'type': 'text/plain', 'language': None, 'base...","[{'name': 'Jianchao Ji'}, {'name': 'Zelong Li'...",{'name': 'Yongfeng Zhang'},Yongfeng Zhang,,"[{'href': 'http://arxiv.org/abs/2307.00457v2',...","{'term': 'cs.IR', 'scheme': 'http://arxiv.org/...","[{'term': 'cs.IR', 'scheme': 'http://arxiv.org..."
2,http://arxiv.org/abs/2502.14318v1,True,http://arxiv.org/abs/2502.14318v1,2025-02-20T07:13:29Z,"(2025, 2, 20, 7, 13, 29, 3, 51, 0)",2025-02-20T07:13:29Z,"(2025, 2, 20, 7, 13, 29, 3, 51, 0)",Line Goes Up? Inherent Limitations of Benchmar...,"{'type': 'text/plain', 'language': None, 'base...",Large language models (LLMs) regularly demonst...,"{'type': 'text/plain', 'language': None, 'base...",[{'name': 'James Fodor'}],{'name': 'James Fodor'},James Fodor,10 pages,"[{'href': 'http://arxiv.org/abs/2502.14318v1',...","{'term': 'cs.CL', 'scheme': 'http://arxiv.org/...","[{'term': 'cs.CL', 'scheme': 'http://arxiv.org..."
3,http://arxiv.org/abs/2309.16289v1,True,http://arxiv.org/abs/2309.16289v1,2023-09-28T09:35:59Z,"(2023, 9, 28, 9, 35, 59, 3, 271, 0)",2023-09-28T09:35:59Z,"(2023, 9, 28, 9, 35, 59, 3, 271, 0)",LawBench: Benchmarking Legal Knowledge of Larg...,"{'type': 'text/plain', 'language': None, 'base...",Large language models (LLMs) have demonstrated...,"{'type': 'text/plain', 'language': None, 'base...","[{'name': 'Zhiwei Fei'}, {'name': 'Xiaoyu Shen...",{'name': 'Jidong Ge'},Jidong Ge,,"[{'href': 'http://arxiv.org/abs/2309.16289v1',...","{'term': 'cs.CL', 'scheme': 'http://arxiv.org/...","[{'term': 'cs.CL', 'scheme': 'http://arxiv.org..."
4,http://arxiv.org/abs/2406.13094v2,True,http://arxiv.org/abs/2406.13094v2,2024-11-02T11:49:49Z,"(2024, 11, 2, 11, 49, 49, 5, 307, 0)",2024-06-18T22:57:06Z,"(2024, 6, 18, 22, 57, 6, 1, 170, 0)",Exploring and Benchmarking the Planning Capabi...,"{'type': 'text/plain', 'language': None, 'base...",Classical and natural language planning tasks ...,"{'type': 'text/plain', 'language': None, 'base...","[{'name': 'Bernd Bohnet'}, {'name': 'Azade Nov...",{'name': 'Hanie Sedghi'},Hanie Sedghi,,"[{'href': 'http://arxiv.org/abs/2406.13094v2',...","{'term': 'cs.CL', 'scheme': 'http://arxiv.org/...","[{'term': 'cs.CL', 'scheme': 'http://arxiv.org..."
5,http://arxiv.org/abs/2309.17012v3,True,http://arxiv.org/abs/2309.17012v3,2024-09-25T16:57:20Z,"(2024, 9, 25, 16, 57, 20, 2, 269, 0)",2023-09-29T06:53:10Z,"(2023, 9, 29, 6, 53, 10, 4, 272, 0)",Benchmarking Cognitive Biases in Large Languag...,"{'type': 'text/plain', 'language': None, 'base...",Large Language Models are cognitively biased j...,"{'type': 'text/plain', 'language': None, 'base...","[{'name': 'Ryan Koo'}, {'name': 'Minhwa Lee'},...",{'name': 'Dongyeop Kang'},Dongyeop Kang,"Publishsed at ACL 2024. 29 pages, 9 figures, 1...","[{'href': 'http://arxiv.org/abs/2309.17012v3',...","{'term': 'cs.CL', 'scheme': 'http://arxiv.org/...","[{'term': 'cs.CL', 'scheme': 'http://arxiv.org..."
6,http://arxiv.org/abs/2410.20302v3,True,http://arxiv.org/abs/2410.20302v3,2025-01-02T23:08:47Z,"(2025, 1, 2, 23, 8, 47, 3, 2, 0)",2024-10-27T00:50:30Z,"(2024, 10, 27, 0, 50, 30, 6, 301, 0)",Sequential Large Language Model-Based Hyper-pa...,"{'type': 'text/plain', 'language': None, 'base...","This study introduces SLLMBO, an innovative fr...","{'type': 'text/plain', 'language': None, 'base...","[{'name': 'Kanan Mahammadli'}, {'name': 'Seyda...",{'name': 'Seyda Ertekin'},Seyda Ertekin,,"[{'href': 'http://arxiv.org/abs/2410.20302v3',...","{'term': 'cs.LG', 'scheme': 'http://arxiv.org/...","[{'term': 'cs.LG', 'scheme': 'http://arxiv.org..."
7,http://arxiv.org/abs/2406.05540v2,True,http://arxiv.org/abs/2406.05540v2,2024-07-08T16:39:35Z,"(2024, 7, 8, 16, 39, 35, 0, 190, 0)",2024-06-08T18:11:30Z,"(2024, 6, 8, 18, 11, 30, 5, 160, 0)",A Fine-tuning Dataset and Benchmark for Large ...,"{'type': 'text/plain', 'language': None, 'base...",The parallels between protein sequences and na...,"{'type': 'text/plain', 'language': None, 'base...","[{'name': 'Yiqing Shen'}, {'name': 'Zan Chen'}...",{'name': 'Yu Guang Wang'},Yu Guang Wang,,"[{'href': 'http://arxiv.org/abs/2406.05540v2',...","{'term': 'q-bio.QM', 'scheme': 'http://arxiv.o...","[{'term': 'q-bio.QM', 'scheme': 'http://arxiv...."
8,http://arxiv.org/abs/2404.05545v2,True,http://arxiv.org/abs/2404.05545v2,2024-12-22T12:22:53Z,"(2024, 12, 22, 12, 22, 53, 6, 357, 0)",2024-04-08T14:15:56Z,"(2024, 4, 8, 14, 15, 56, 0, 99, 0)",Evaluating Interventional Reasoning Capabiliti...,"{'type': 'text/plain', 'language': None, 'base...",Numerous decision-making tasks require estimat...,"{'type': 'text/plain', 'language': None, 'base...","[{'name': 'Tejas Kasetty'}, {'name': 'Divyat M...",{'name': 'Dhanya Sridhar'},Dhanya Sridhar,17 pages,"[{'href': 'http://arxiv.org/abs/2404.05545v2',...","{'term': 'cs.LG', 'scheme': 'http://arxiv.org/...","[{'term': 'cs.LG', 'scheme': 'http://arxiv.org..."
9,http://arxiv.org/abs/2404.18824v1,True,http://arxiv.org/abs/2404.18824v1,2024-04-29T16:05:36Z,"(2024, 4, 29, 16, 5, 36, 0, 120, 0)",2024-04-29T16:05:36Z,"(2024, 4, 29, 16, 5, 36, 0, 120, 0)",Benchmarking Benchmark Leakage in Large Langua...,"{'type': 'text/plain', 'language': None, 'base...","Amid the expanding use of pre-training data, t...","{'type': 'text/plain', 'language': None, 'base...","[{'name': 'Ruijie Xu'}, {'name': 'Zengzhi Wang...",{'name': 'Pengfei Liu'},Pengfei Liu,30 pages; Homepage: https://gair-nlp.github.io...,"[{'href': 'http://arxiv.org/abs/2404.18824v1',...","{'term': 'cs.CL', 'scheme': 'http://arxiv.org/...","[{'term': 'cs.CL', 'scheme': 'http://arxiv.org..."


## ICML Proceedings


In [365]:
icml_url_dict = {2024:'https://proceedings.mlr.press/v235/assets/rss/feed.xml',
       2023: 'https://proceedings.mlr.press/v202/assets/rss/feed.xml',
       2022: 'https://proceedings.mlr.press/v162/assets/rss/feed.xml',
       2021: 'https://proceedings.mlr.press/v139/assets/rss/feed.xml',
       2020: 'https://proceedings.mlr.press/v119/assets/rss/feed.xml',
       2019: 'https://proceedings.mlr.press/v97/assets/rss/feed.xml',
       2018: 'https://proceedings.mlr.press/v80/assets/rss/feed.xml',
       #2017: 'https://proceedings.mlr.press/v70/assets/rss/feed.xml',
       #2016: 'https://proceedings.mlr.press/v48/assets/rss/feed.xml',
       }

In [366]:
def get_icml_papers(year):
    url = icml_url_dict[year]
    data = urllib.request.urlopen(url)
    response = feedparser.parse(data)

    icml = pd.DataFrame(response['entries'])[['title','summary','link']]
    icml['year'] = year

    is_LLM_title = icml['title'].apply(lambda x: ('LLM' in x) or ('language model' in x.lower()))
    is_LLM_abstract = icml['summary'].apply(lambda x: ('LLM' in x) or ('language model' in x.lower()))
    is_benchmark_title = icml['title'].apply(lambda x: ('benchmark' in x.lower()))
    is_benchmark_abstract = icml['summary'].apply(lambda x: ('benchmark' in x.lower()))

    is_LLM = is_LLM_title | is_LLM_abstract
    is_benchmark = is_benchmark_title | is_benchmark_abstract

    include = is_LLM & is_benchmark
    return icml[include]

In [367]:
def count_icml_papers(year):
    url = icml_url_dict[year]
    data = urllib.request.urlopen(url)
    response = feedparser.parse(data)
    icml = pd.DataFrame(response['entries'])[['title','summary','link']]
    return len(icml)

In [368]:
dfs = []
total = 0 
for year in icml_url_dict.keys():
    dfs.append(get_icml_papers(year))
    total += count_icml_papers(year)


icml_df = pd.concat(dfs)
icml_df['venue'] = 'ICML'
print(f'Selected {len(icml_df)} papers out of {total} ICML papers')

Selected 171 papers out of 9332 ICML papers


In [369]:
icml_df.groupby('year').count()

Unnamed: 0_level_0,title,summary,link,venue
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018,1,1,1,1
2020,4,4,4,4
2021,3,3,3,3
2022,7,7,7,7
2023,26,26,26,26
2024,130,130,130,130


## ICLR Proceedings

In [370]:
def get_iclr_papers(year):
    
    iclr = pd.read_csv(f'ICLR_{year}.csv')[['name','abstract','virtualsite_url']]
    iclr.columns = ['title','summary','link']
    iclr['year'] = year
    
    is_LLM_title = iclr['title'].apply(lambda x: ('LLM' in x) or ('language model' in x.lower()))
    is_benchmark_title = iclr['title'].apply(lambda x: ('benchmark' in x.lower()))
    
    is_LLM_abstract = iclr['summary'].apply(lambda x: ('LLM' in x) or ('language model' in x.lower()) if isinstance(x,str) else False)
    is_benchmark_abstract = iclr['summary'].apply(lambda x: ('benchmark' in x.lower()) if isinstance(x,str) else False)

    is_LLM = is_LLM_title | is_LLM_abstract
    is_benchmark = is_benchmark_title | is_benchmark_abstract

    include = is_LLM & is_benchmark
    return iclr[include]

In [371]:
def count_iclr_papers(year):
    
    iclr = pd.read_csv(f'ICLR_{year}.csv')[['name','abstract','virtualsite_url']]
    return len(iclr)

In [372]:
dfs = []
total = 0
for year in range(2018,2025):
    dfs.append(get_iclr_papers(int(year)))
    total += count_iclr_papers(int(year))

iclr_df = pd.concat(dfs)
iclr_df['venue'] = 'ICLR'
print(f'Selected {len(iclr_df)} out of {total} total papers.')

Selected 263 out of 7359 total papers.


In [373]:
iclr_df.groupby('year').count()

Unnamed: 0_level_0,title,summary,link,venue
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018,3,3,3,3
2019,4,4,4,4
2020,6,6,6,6
2021,15,15,15,15
2022,22,22,22,22
2023,49,49,49,49
2024,164,164,164,164


## NeurIPS Proceedings

In [374]:
def get_neurips_papers(year):
    
    neurips = pd.read_csv(f'NeurIPS_{year}.csv')[['name','abstract','virtualsite_url']]
    neurips.columns = ['title','summary','link']
    neurips['year'] = year
    
    is_LLM_title = neurips['title'].apply(lambda x: ('LLM' in x) or ('language model' in x.lower()))
    is_benchmark_title = neurips['title'].apply(lambda x: ('benchmark' in x.lower()))

    is_LLM_abstract = neurips['summary'].apply(lambda x: ('LLM' in x) or ('language model' in x.lower()) if isinstance(x,str) else False)
    is_benchmark_abstract = neurips['summary'].apply(lambda x: ('benchmark' in x.lower()) if isinstance(x,str) else False)

    is_LLM = is_LLM_title | is_LLM_abstract
    is_benchmark = is_benchmark_title | is_benchmark_abstract

    include = is_LLM & is_benchmark
    return neurips[include]

In [375]:
def count_neurips_papers(year):
    
    neurips = pd.read_csv(f'NeurIPS_{year}.csv')[['name','abstract','virtualsite_url']]
    return len(neurips)

In [376]:
dfs = []
total = 0
for year in range(2018,2025):
    dfs.append(get_neurips_papers(int(year)))
    total += count_neurips_papers(int(year))

neurips_df = pd.concat(dfs)
neurips_df['venue'] = 'NeurIPS'
print(f'Selected {len(neurips_df)} out of {total} total papers.')

Selected 572 out of 17700 total papers.


In [377]:
neurips_df.groupby('year').count()

Unnamed: 0_level_0,title,summary,link,venue
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018,2,2,2,2
2019,4,4,4,4
2020,4,4,4,4
2021,12,12,12,12
2022,36,36,36,36
2023,122,122,122,122
2024,392,392,392,392


## ACL Proceedings

In [None]:
import lxml.etree as etree
import os

In [104]:
def get_acl_papers(venue,year):
    file = f'{year}.{venue}.xml'
    if os.path.exists(file): 
        response = etree.parse(file)
    else:
        return None
    root = response.getroot()
    keys = root.findall(".//paper")
    keys = [key for key in keys if key.find(".//abstract") is not None]
    keys = [key for key in keys if key.find(".//doi") is not None]
    papers = [{'title': etree.tostring(key.find(".//title"),method='text',encoding='utf-8').decode('utf-8').strip(),
               'abstract': etree.tostring(key.find(".//abstract"),method='text',encoding='utf-8').decode('utf-8').strip(),
               'link': etree.tostring(key.find(".//doi"),method='text',encoding='utf-8').decode('utf-8').strip()} for key in keys]
    acl = pd.DataFrame(papers)

    acl.columns = ['title','summary','link']
    acl['year'] = year
    
    has_title = acl['title'].apply(lambda x: isinstance(x,str))
    has_abstract = acl['summary'].apply(lambda x: isinstance(x,str))
    #print(f'{file} Removed {sum(~has_title)} papers without title and {sum(has_title & ~has_abstract)} without abstract.')
    acl = acl[has_title & has_abstract]

    is_LLM_title = acl['title'].apply(lambda x: ('LLM' in x) or ('language model' in x.lower()))
    is_benchmark_title = acl['title'].apply(lambda x: ('benchmark' in x.lower()))

    is_LLM_abstract = acl['summary'].apply(lambda x: ('LLM' in x) or ('language model' in x.lower()) if isinstance(x,str) else False)
    is_benchmark_abstract = acl['summary'].apply(lambda x: ('benchmark' in x.lower()) if isinstance(x,str) else False)

    is_LLM = is_LLM_title | is_LLM_abstract
    is_benchmark = is_benchmark_title | is_benchmark_abstract

    include = is_LLM & is_benchmark
    return acl[include]

In [81]:
def count_acl_papers(venue,year):
    file = f'{year}.{venue}.xml'
    if os.path.exists(file): 
        response = etree.parse(file)
    else:
        return 0
    root = response.getroot()
    keys = root.findall(".//paper")
    return len(keys)

In [105]:
dfs = []
total = 0
for year in range(2020,2025):
    for venue in ['acl','emnlp','naacl']:
        dfs.append(get_acl_papers(venue, int(year)))
        total += count_acl_papers(venue,int(year))

acl_df = pd.concat(dfs)
acl_df['venue'] = 'ACL'
print(f'Selected {len(acl_df)} out of {total} total papers.')

Selected 1182 out of 11723 total papers.


In [None]:
acl_df['venue'] = acl_df['link'].apply(lambda x: x.replace('10.18653/v1/','').split('.')[1])

In [133]:
acl_excluded_venues = ['emnlp-tutorials','emnlp-demo','emnlp-demos','emnlp-industry','emnlp-srw',
                       'naacl-demo','naacl-demos','naacl-industry','naacl-srw'
                       'acl-demos ','acl-demos','acl-demo','acl-industry','acl-srw',]
acl_df = acl_df[~acl_df['venue'].isin(acl_excluded_venues)]

In [143]:
acl_df['link'] = acl_df['link'].apply(lambda x: x.replace('10.18653/v1/','https://aclanthology.org/'))

## Aggregation

In [1]:
#Total searched:

11723+17700+7359+9332

46114

In [382]:
conferences_df = pd.concat([icml_df,iclr_df,neurips_df,acl_df])
conferences_df.reset_index(inplace=True,drop=True)

In [383]:
conferences_df.shape

(2189, 5)

In [384]:
conferences_df.groupby('year').count()

Unnamed: 0_level_0,title,summary,link,venue
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018,6,6,6,6
2019,8,8,8,8
2020,60,60,14,60
2021,132,132,30,132
2022,219,219,65,219
2023,464,464,197,464
2024,1300,1300,686,1300


In [385]:
print(f"Total Results: {len(conferences_df)}")

total_tokens = conferences_df.apply(lambda x: len(encoding.encode(x['summary'])), axis=1).sum()

print('\n--------------')
print(f"Total Tokens: {total_tokens}")
print(f"Average Tokens: {total_tokens/len(conferences_df)}")
print(f"Estimate Cost (GPT 4o Mini): {int(total_tokens)/1000000*0.15}")

Total Results: 2189

--------------
Total Tokens: 519527
Average Tokens: 237.33531292827774
Estimate Cost (GPT 4o Mini): 0.07792904999999999


## LLM as Inclusion Criteria

In [170]:
import openai
import os

In [169]:
def score_article(title, abstract, user_prompt):
    client = openai.OpenAI()

    response = client.chat.completions.create(
        model='gpt-4o-mini',
        messages=[
            {
                'role': "system",
                'content': "You are an academic assistant, filtering articles to identify which ones are relevant to a literature review.",
            },
            {
                'role': "user",
                'content': f"""{user_prompt} 
                
                **Title:** {title}

                **Abstract:** {abstract}
                """,
            },
        ],
        temperature=0,
        max_tokens=100,
        timeout=30,
    )

    text = response.choices[0].message.content
    if '**Answer:**' in text:
        answer = text.split('**Answer:**')[1].strip()
    else:
        answer = 'Not found'

    return text, answer

In [388]:
user_prompt = """Please read the paper title and abstract below, and tell me whether the paper creates and describes a new benchmark for large language models.
                  
                After reading the title and abstract, please very briefly describe whether the article implements a new benchmark for large language models. Then, on a new line write **Answer:** followed by a single word answer of 'Yes' or 'No' as to whether the article creates and describes a new benchmark."""

results = conferences_df.apply(lambda x: score_article(x['title'],x['summary'],user_prompt), axis=1)

In [389]:
conferences_df['inclusion'] = results.apply(lambda x: x[1])

In [174]:
conferences_df['inclusion'].value_counts()

inclusion
No           1197
Yes           896
Not found       2
Name: count, dtype: int64

In [391]:
user_prompt = """Please read the paper title and abstract below, and tell me the primary modality of the dataset being used.
                  
                After reading the title and abstract, please very briefly describe the primary modality of the article. Then, on a new line write **Answer:** followed by a single word answer describing the primary modality considered in the article. Your answer should be either Language, Image, Video, Audio, Multimodal or Other. Use Other only when the primary modality is not one of the previous options."""

temp = conferences_df[conferences_df['inclusion'] == 'Yes'].apply(lambda x: score_article(x['title'],x['summary'],user_prompt), axis=1)

In [392]:
conferences_df['modality'] = temp.apply(lambda x: x[1])

In [185]:
conferences_df[['inclusion','modality']].value_counts()

inclusion  modality  
Yes        Language      670
           Multimodal    135
           Image          57
           Video          17
           Audio          12
           Other           5
Name: count, dtype: int64

In [176]:
user_prompt = """Please read the paper title and abstract below, and tell me the primary focus area of the paper.
                  
                After reading the title and abstract, please very briefly describe the primary focus of the paper. Then, on a new line write **Answer:** followed by a single word answer of 'Benchmark', 'Technical', 'Methodological' or 'Other' to categorize the primary contribution."""

results = conferences_df[(conferences_df['inclusion'] == 'Yes') & (conferences_df['modality'] == 'Multimodal')].apply(lambda x: score_article(x['title'],x['summary'],user_prompt), axis=1)

In [186]:
results.apply(lambda x: x[1])

15           Benchmark
16           Benchmark
25           Benchmark
36           Technical
101     Methodological
             ...      
2004    Methodological
2007         Benchmark
2037    Methodological
2077    Methodological
2089         Benchmark
Length: 135, dtype: object

In [None]:
conferences_df['contribution'] = results.apply(lambda x: x[1])

In [199]:
conferences_df[['inclusion','modality','contribution']].value_counts()

inclusion  modality    contribution  
Yes        Language    Benchmark         430
                       Methodological    215
           Multimodal  Benchmark          92
                       Methodological     37
           Language    Technical          25
           Multimodal  Technical           5
                       Other               1
Name: count, dtype: int64

In [200]:
conferences_df.shape

(2096, 9)

In [203]:
conferences_df.drop('new_contribution',inplace=True,axis=1)

In [None]:
import pandas as pd
conferences_df = pd.read_csv('included_papers_r2.csv')

In [166]:
conferences_df = conferences_df.merge(acl_df,on=['title','summary','year'],how='left',suffixes=('', '_acl'))
conferences_df['venue'] = conferences_df.apply(lambda x: x['venue_acl'] if pd.notna(x['venue_acl']) else x['venue'], axis=1)
conferences_df = conferences_df[~(conferences_df['venue'] == 'ACL')]
conferences_df['link'] = conferences_df.apply(lambda x: x['link_acl'] if pd.notna(x['link_acl']) else x['link'], axis=1)
conferences_df = conferences_df[['title','summary','link','year','venue','inclusion','modality','contribution']]
conferences_df.to_csv('included_papers_r3.csv',index=False)

In [167]:
conferences_df[conferences_df['modality'] == 'Language']['venue'].value_counts()

venue
emnlp-main     187
NeurIPS        171
acl-long       151
ICLR            58
ICML            46
naacl-long      32
acl-short       11
naacl-main       9
acl-main         3
naacl-short      2
Name: count, dtype: int64

In [5]:
conferences_df[conferences_df['modality'] == 'Language'][['title','link']][-15:]

Unnamed: 0,title,link
2147,DialogBench: Evaluating LLMs as Human-like Dia...,
2149,CMB: A Comprehensive Medical Benchmark in Chinese,
2154,PlanRAG: A Plan-then-Retrieval Augmented Gener...,
2159,MisgenderMender: A Community-Informed Approach...,
2160,Deceptive Semantic Shortcuts on Reasoning Chai...,
2161,Leveraging LLMs for Synthesizing Training Data...,
2163,SuperGLEBer: German Language Understanding Eva...,
2165,BUST: Benchmark for the evaluation of detector...,
2171,IndiBias: A Benchmark Dataset to Measure Socia...,
2172,Struc-Bench: Are Large Language Models Good at...,


In [43]:
pd.read_csv('../data/included_papers_embeddings.csv')['inclusion'].value_counts()

inclusion
No           1248
Yes           938
Not found       2
Name: count, dtype: int64

In [41]:
pd.read_csv('../data/included_papers_embeddings.csv')['modality'].value_counts()

modality
Language      709
Multimodal    137
Image          57
Video          17
Audio          12
Other           6
Name: count, dtype: int64

In [42]:
pd.read_csv('../data/included_papers_embeddings.csv')['contribution'].value_counts()

contribution
Benchmark         450
Methodological    227
Technical          32
Name: count, dtype: int64

## Manual Filtering (for validation)

In [430]:
import textwrap
import random

random.seed(0)
random_subset = random.choices(list(range(len(conferences_df))),k=50)

subset_df = conferences_df.iloc[random_subset][['title','summary']]


In [436]:
from IPython.display import clear_output

def clear_and_print(message):
    clear_output(wait=True)
    print(message)

In [438]:
inclusions = []
modalities = []
contributions = []
for i,row in subset_df.iterrows():
    clear_output(wait=False)
    print(i)
    print(row['title'])
    print(textwrap.fill(row['summary'],80))
    print('\n')
    inclusion = input('Inclusion: Does the paper introduce a benchmark? Yes or No')
    if inclusion == 'No':
        inclusions.append('No')
        modalities.append(pd.NA)
        contributions.append(pd.NA)
    else:    
        modality = input('Modality: Language, Image, Video, Audio, Multimodal or Other')
        if modality == 'Language':
            contribution = input('Contribution: Benchmark, Technical, Methodological or Other')
            inclusions.append('Yes')
            modalities.append('Language')
            contributions.append(contribution)        
        else:
            inclusions.append('Yes')
            modalities.append(modality)
            contributions.append(pd.NA)
    print('\n')

2117
PatentEval: Understanding Errors in Patent Generation
In this work, we introduce a comprehensive error typology specifically designed
for evaluating two distinct tasks in machine-generated patent texts: claims-to-
abstract generation, and the generation of the next claim given previous ones.
We have also developed a benchmark, PatentEval, for systematically assessing
language models in this context. Our study includes a comparative analysis,
annotated by humans, of various models. These range from those specifically
adapted during training for tasks within the patent domain to the latest
general-purpose large language models (LLMs). Furthermore, we explored and
evaluated some metrics to approximate human judgments in patent text evaluation,
analyzing the extent to which these metrics align with expert assessments. These
approaches provide valuable insights into the capabilities and limitations of
current language models in the specialized field of patent text generation.






In [439]:
subset_df['inclusion'] = inclusions
subset_df['modality'] = modalities
subset_df['contribution'] = contributions

In [6]:
#subset_df.to_csv('manual_subset.csv',index=True)
subset_df = pd.read_csv('../data/manual_subset.csv',index_col=0)
conferences_df = pd.read_csv('../data/included_papers_embeddings.csv',index_col=0)

In [7]:
validation_df = subset_df.merge(conferences_df,left_on=['title','summary'],right_on=['title','summary'],suffixes=('_manual','_auto'))

In [8]:
pd.crosstab(validation_df['inclusion_manual'],validation_df['inclusion_auto'],dropna=False)

inclusion_auto,No,Yes
inclusion_manual,Unnamed: 1_level_1,Unnamed: 2_level_1
No,22,9
Yes,5,14


In [None]:
validation_df[(validation_df['inclusion_manual'] == 'Yes') & (validation_df['inclusion_auto'] == 'No')] 

Unnamed: 0,title,summary,inclusion_manual,modality_manual,contribution_manual,link,year,venue,inclusion_auto,modality_auto,contribution_auto,embedding
13,LLaMA Pro: Progressive LLaMA with Block Expansion,Humans generally acquire new skills without co...,Yes,Language,Technical,,2024,ACL,No,,,"[0.009012330323457718, -0.0016180069651454687,..."
22,Pcc-tuning: Breaking the Contrastive Learning ...,Semantic Textual Similarity (STS) constitutes ...,Yes,Language,Technical,,2024,ACL,No,,,"[0.002708073239773512, -0.013634885661303997, ..."
29,Efficient Benchmarking (of Language Models),The increasing versatility of language models ...,Yes,Language,Methodological,,2024,ACL,No,,,"[0.01993037760257721, -0.031682100147008896, 0..."
42,Do Text-to-Vis Benchmarks Test Real Use of Vis...,Large language models are able to generate cod...,Yes,Multimodal,,,2024,ACL,No,,,"[-0.018991637974977493, -0.0006011492223478854..."
46,Training Socially Aligned Language Models on S...,The goal of social alignment for AI systems is...,Yes,No,,https://iclr.cc//virtual/2024/poster/18780,2024,ICLR,No,,,"[0.015459313057363033, -0.00032864700187928975..."


In [12]:
print(validation_df['modality_manual'].value_counts())
validation_df['modality_auto'].value_counts()

modality_manual
Language      12
Multimodal     3
Image          2
No             1
Video          1
Name: count, dtype: int64


modality_auto
Language      17
Image          3
Multimodal     2
Video          1
Name: count, dtype: int64

In [465]:
pd.crosstab(validation_df['modality_manual'],validation_df['modality_auto'],dropna=False)

modality_auto,Image,Language,Multimodal,Video
modality_manual,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Image,1,0,1,0
Language,0,9,0,0
Multimodal,1,0,1,0
Video,0,0,0,1


In [None]:
print(validation_df['modality_manual'].value_counts())
validation_df['modality_auto'].value_counts()

In [466]:
pd.crosstab(validation_df['contribution_manual'],validation_df['contribution_auto'],dropna=False)

contribution_auto,Benchmark,Methodological,Technical
contribution_manual,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Benchmark,8,1,0


In [468]:
pd.crosstab(validation_df['contribution_auto'],validation_df['contribution_manual'],dropna=False)

contribution_manual,Benchmark,Methodological,Technical
contribution_auto,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Benchmark,8,0,0
Methodological,1,0,0


In [17]:
validation_df['overall_include_manual'] = (validation_df['inclusion_manual'] == 'Yes') & (validation_df['modality_manual'] == 'Language') & (validation_df['contribution_manual'] == 'Benchmark')
validation_df['overall_include_auto'] = (validation_df['inclusion_auto'] == 'Yes') & (validation_df['modality_auto'] == 'Language') & (validation_df['contribution_auto'] == 'Benchmark')

pd.crosstab(validation_df['overall_include_manual'],validation_df['overall_include_auto'])

overall_include_auto,False,True
overall_include_manual,Unnamed: 1_level_1,Unnamed: 2_level_1
False,39,2
True,1,8


In [467]:
validation_df[(validation_df['contribution_manual'] == 'Benchmark') & (validation_df['contribution_auto'] != 'Benchmark')]

Unnamed: 0,title,summary,inclusion_manual,modality_manual,contribution_manual,link,year,venue,inclusion_auto,modality_auto,contribution_auto
32,OpenAGI: When LLM Meets Domain Experts,Human Intelligence (HI) excels at combining ba...,Yes,Language,Benchmark,https://neurips.cc//virtual/2023/poster/73509,2023,NeurIPS,Yes,Language,Methodological


## Clustering

In [8]:
def get_embedding(text):

    client = openai.OpenAI()
    response = client.embeddings.create(
        input=text,
        model="text-embedding-3-small"
    )

    return response.data[0].embedding

In [10]:
conferences_df['embedding'] = conferences_df['summary'].apply(lambda x: get_embedding(x))
conferences_df['embedding'].iloc[0]

[0.03420991078019142,
 -0.008750257082283497,
 0.010895868763327599,
 0.0008428115979768336,
 0.002136621158570051,
 -0.005187224596738815,
 -0.0036589261144399643,
 0.02474045380949974,
 -0.04586093872785568,
 0.04331976920366287,
 0.029127569869160652,
 0.014240144751966,
 -0.059214070439338684,
 0.012136487290263176,
 0.021516043692827225,
 0.0157504640519619,
 0.03965185210108757,
 -0.019310500472784042,
 0.007443712092936039,
 0.005166247952729464,
 -0.029894715175032616,
 -0.025891171768307686,
 0.01710495539009571,
 -0.01941837929189205,
 0.04379923269152641,
 -0.023565761744976044,
 0.009679223410785198,
 -0.015199077315628529,
 0.03684697300195694,
 -0.03284343332052231,
 0.021420150995254517,
 -0.022355109453201294,
 0.0052741277031600475,
 -0.040442969650030136,
 -0.020197512581944466,
 0.017488528043031693,
 0.003503099549561739,
 -0.01018865592777729,
 0.009541376493871212,
 0.022882523015141487,
 -0.024344895035028458,
 -0.048929519951343536,
 -0.013568892143666744,
 0.03

In [11]:
conferences_df.to_csv('included_papers_embeddings.csv',index=False)

In [210]:
conferences_df = pd.read_csv('final_list.csv')
embeddings_df = pd.read_csv('included_papers_embeddings.csv')
embeddings_df = embeddings_df[['title','summary','embedding']]
conferences_df = conferences_df.merge(embeddings_df,on=['title','summary'],how='left',suffixes=('', '_embeddings'))

In [214]:
conferences_df['embedding'] = conferences_df['embedding'].apply(lambda x: eval(x))

In [215]:
from sklearn.cluster import KMeans
import numpy as np

matrix = np.vstack(conferences_df['embedding'].values)
n_clusters = 25

kmeans = KMeans(n_clusters = n_clusters, init='k-means++', random_state=42)
kmeans.fit(matrix)
#df['Cluster'] = kmeans.labels_



In [216]:
conferences_df['filtered_cluster'] = kmeans.labels_
conferences_df['filtered_cluster'].value_counts()

filtered_cluster
16    56
6     42
15    37
23    36
11    35
4     32
1     28
12    25
2     23
17    22
14    19
8     18
0     17
24    16
18    15
7     14
9     14
5     13
20    12
13    11
22    10
21     9
19     8
10     5
3      5
Name: count, dtype: int64

In [217]:
cluster_means = conferences_df.groupby('filtered_cluster')['embedding'].apply(lambda x: np.array([np.array(l) for l in x]).mean(axis=0))

In [246]:
cluster_labels = {0:'General Reasoning',
                  1: 'Factual Knowledge (Updates)',
                  2: 'Medicine', 
                  3: 'Vision',
                  4: 'Factual Knowledge (Hallucination)',
                  5: 'European Languages',
                  6: 'Domain Reasoning',
                  7: 'Long-context',
                  8: 'Visual Grounding',
                  9: 'Assistant Agents',
                  10: 'Jailbreaks',
                  11: 'Multiliguality', 
                  12: 'Coding',
                  13: 'Summarization',
                  14: 'Dialogue Agents',
                  15: 'Multimodal Reasoning',
                  16: 'Tool Use',
                  17: 'Question Answering',
                  18: 'General', 
                  19: 'Language-Specific',
                  20: 'Bias',
                  21: 'Updates/Editing',
                  22: 'Graphs',
                  23: 'General',
                  24: 'Trust and Safety',
                  }

In [251]:
conferences_df['cluster_label'] = conferences_df['filtered_cluster'].apply(lambda x: cluster_labels[x])
conferences_df.sort_values(by='filtered_cluster',inplace=True)
conferences_df.drop('embedding',inplace=True,axis=1)

In [252]:
conferences_df.to_csv('clustered_papers.csv',index=False)

In [245]:
cluster_id = 24
print(len(conferences_df[conferences_df['filtered_cluster'] == cluster_id]))
for i,row in conferences_df[conferences_df['filtered_cluster'] == cluster_id].iterrows():
    dist = np.linalg.norm(row['embedding']-cluster_means[cluster_id])
    print(f" ({dist:.2f}) {row['title']} {row['link']}")
    #print(row['summary'])

16
 (0.66) The WMDP Benchmark: Measuring and Reducing Malicious Use with Unlearning https://proceedings.mlr.press/v235/li24bc.html
 (0.57) Position: TrustLLM: Trustworthiness in Large Language Models https://proceedings.mlr.press/v235/huang24x.html
 (0.60) Can LLMs Keep a Secret? Testing  Privacy  Implications of Language Models  via Contextual Integrity Theory https://iclr.cc//virtual/2024/poster/18131
 (0.72) DecodingTrust: A Comprehensive Assessment of Trustworthiness in GPT Models https://neurips.cc//virtual/2023/poster/73486
 (0.56) MedSafetyBench: Evaluating and Improving the Medical Safety of Large Language Models https://neurips.cc//virtual/2024/poster/97606
 (0.62) MultiTrust: A Comprehensive Benchmark Towards Trustworthy Multimodal Large Language Models https://neurips.cc//virtual/2024/poster/97845
 (0.57) SafeWorld: Geo-Diverse Safety Alignment https://neurips.cc//virtual/2024/poster/94887
 (0.48) SG-Bench: Evaluating LLM Safety Generalization Across Diverse Tasks and Prompt

## Bibtex Processing

### Get BibTex Files

In [88]:
import requests
from bs4 import BeautifulSoup
import json
import re

In [None]:
url = "https://aclanthology.org/2024.emnlp-main.844/"

def get_acl_bibtex(url):
    response = requests.get(url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, "html.parser")
        bibtex = soup.find("pre", id="citeBibtexContent").text
        return bibtex
    else:
        print(f"Failed to retrieve the page. Status code: {response.status_code}")
        return None

get_acl_bibtex(url)

'@inproceedings{chen-etal-2024-copybench,\n    title = "{C}opy{B}ench: Measuring Literal and Non-Literal Reproduction of Copyright-Protected Text in Language Model Generation",\n    author = "Chen, Tong  and\n      Asai, Akari  and\n      Mireshghallah, Niloofar  and\n      Min, Sewon  and\n      Grimmelmann, James  and\n      Choi, Yejin  and\n      Hajishirzi, Hannaneh  and\n      Zettlemoyer, Luke  and\n      Koh, Pang Wei",\n    editor = "Al-Onaizan, Yaser  and\n      Bansal, Mohit  and\n      Chen, Yun-Nung",\n    booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing",\n    month = nov,\n    year = "2024",\n    address = "Miami, Florida, USA",\n    publisher = "Association for Computational Linguistics",\n    url = "https://aclanthology.org/2024.emnlp-main.844/",\n    doi = "10.18653/v1/2024.emnlp-main.844",\n    pages = "15134--15158",\n    abstract = "Evaluating the degree of reproduction of copyright-protected content by language mo

In [None]:
def get_iclr_bibtex(url):
    response = requests.get(url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, "html.parser")

        open_review_url = soup.find("a", title='OpenReview').get('href')

        return get_openreview_bibtex(open_review_url)

    else:
        print(f"Failed to retrieve page. Status code: {response.status_code}")
        return None

def get_openreview_bibtex(url):
    response = requests.get(url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, "html.parser")
        props = soup.find("script", id="__NEXT_DATA__").text
        props = json.loads(props)
        bibtex = props['props']['pageProps']['forumNote']['content']['_bibtex']
        return bibtex
    else:
        print(f"Failed to retrieve page. Status code: {response.status_code}")
        return None
    
get_iclr_bibtex("https://iclr.cc//virtual/2023/poster/10766")

'@inproceedings{\nwang2023on,\ntitle={On Pre-training Language Model for Antibody},\nauthor={Danqing Wang and Fei YE and Hao Zhou},\nbooktitle={The Eleventh International Conference on Learning Representations },\nyear={2023},\nurl={https://openreview.net/forum?id=zaq4LV55xHl}\n}'

In [71]:
url = "https://neurips.cc//virtual/2022/poster/55752"


def get_neurips_bibtex(url):
    response = requests.get(url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, "html.parser")
        paper_page = soup.find("a", title="Paper").get("href")
        response = requests.get(paper_page)
        if response.status_code == 200:
            soup = BeautifulSoup(response.text, "html.parser")
            bibtex = soup.find("a", string="Bibtex").get("href")
            response = requests.get('https://proceedings.neurips.cc' + bibtex)
            return response.text
        else:
            print(f"Failed to retrieve the paper page. Status code: {response.status_code}")
            return None
    else:
        print(f"Failed to retrieve page. Status code: {response.status_code}")
        return None

get_neurips_bibtex(url)

'@inproceedings{NEURIPS2022_e467582d,\n author = {Xu, Minghao and Zhang, Zuobai and Lu, Jiarui and Zhu, Zhaocheng and Zhang, Yangtian and Chang, Ma and Liu, Runcheng and Tang, Jian},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Koyejo and S. Mohamed and A. Agarwal and D. Belgrave and K. Cho and A. Oh},\n pages = {35156--35173},\n publisher = {Curran Associates, Inc.},\n title = {PEER: A Comprehensive and Multi-Task Benchmark for Protein Sequence Understanding},\n url = {https://proceedings.neurips.cc/paper_files/paper/2022/file/e467582d42d9c13fa9603df16f31de6d-Paper-Datasets_and_Benchmarks.pdf},\n volume = {35},\n year = {2022}\n}\n'

In [100]:
def get_icml_bibtex(url):
    response = requests.get(url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, "html.parser")
        bib_page = soup.find("code", id="bibtex").text
        return bib_page
    else:
        print(f"Failed to retrieve page. Status code: {response.status_code}")
        return None
    
get_icml_bibtex('https://proceedings.mlr.press/v235/zhang24ad.html')

'\n@InProceedings{pmlr-v235-zhang24ad,\n  title = \t {Revisiting Zeroth-Order Optimization for Memory-Efficient {LLM} Fine-Tuning: A Benchmark},\n  author =       {Zhang, Yihua and Li, Pingzhi and Hong, Junyuan and Li, Jiaxiang and Zhang, Yimeng and Zheng, Wenqing and Chen, Pin-Yu and Lee, Jason D. and Yin, Wotao and Hong, Mingyi and Wang, Zhangyang and Liu, Sijia and Chen, Tianlong},\n  booktitle = \t {Proceedings of the 41st International Conference on Machine Learning},\n  pages = \t {59173--59190},\n  year = \t {2024},\n  editor = \t {Salakhutdinov, Ruslan and Kolter, Zico and Heller, Katherine and Weller, Adrian and Oliver, Nuria and Scarlett, Jonathan and Berkenkamp, Felix},\n  volume = \t {235},\n  series = \t {Proceedings of Machine Learning Research},\n  month = \t {21--27 Jul},\n  publisher =    {PMLR},\n  pdf = \t {https://raw.githubusercontent.com/mlresearch/v235/main/assets/zhang24ad/zhang24ad.pdf},\n  url = \t {https://proceedings.mlr.press/v235/zhang24ad.html},\n  abstra

In [None]:
import time

def get_conference_bibtex(url, venue):
    time.sleep(1)  # To avoid overwhelming the server with requests
    try:  
        if venue == 'ICLR':
            return get_iclr_bibtex(url)
        elif venue == 'NeurIPS':
            return get_neurips_bibtex(url)
        elif venue == 'ICML':
            return get_icml_bibtex(url)
        else:
            return get_acl_bibtex(url)
    except Exception as e:
        print(f"Error retrieving bibtex: {e}")
        return None

In [None]:
import pandas as pd


conferences_df = pd.read_csv('final_list.csv')

In [107]:
conferences_df['bibtex'] = conferences_df.apply(lambda x: get_conference_bibtex(x['link'],x['venue']), axis=1)

Error retrieving bibtex: 'NoneType' object has no attribute 'get'
Error retrieving bibtex: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
Error retrieving bibtex: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
Error retrieving bibtex: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
Error retrieving bibtex: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
Error retrieving bibtex: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
Error retrieving bibtex: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
Error retrieving bibtex: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
Error retrieving bibtex: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without re

In [119]:
temp = conferences_df[conferences_df['bibtex'].isna()].apply(lambda x: get_conference_bibtex(x['link'],x['venue']), axis=1)

Error retrieving bibtex: 'NoneType' object has no attribute 'get'


In [120]:
conferences_df['bibtex'] = conferences_df['bibtex'].fillna(temp)

In [143]:
conferences_df = conferences_df[['title','summary','link','year','venue','inclusion','modality','contribution','bibtex']]
conferences_df.to_csv('final_list_bibtex.csv',index=False)

In [147]:
with open('output.bib', 'w') as file:
    for i,row in conferences_df.iterrows():
       print(row['bibtex'], file=file)

### Clean Bibtex

In [10]:
import bibtexparser

bibtex_writer = bibtexparser.bwriter.BibTexWriter()
conferences_df['bibtex_clean'] = conferences_df['bibtex'].apply(lambda x: bibtexparser.loads(x))
for i,row in conferences_df.iterrows():
    row['bibtex_clean'].entries[0]['join_key'] = str(i)

conferences_df['bibtex_clean'] = conferences_df['bibtex_clean'].apply(lambda x: bibtex_writer.write(x))
conferences_df['bibtex_title'] = conferences_df['bibtex'].apply(lambda x: bibtexparser.loads(x).entries[0]['title'])

In [None]:
with open('../data/output.bib', 'w') as file:
    for i,row in conferences_df.iterrows():
       print(row['bibtex_clean'], file=file)

In [None]:
with open('../data/bibkeys.bib', 'r') as bib_file:
    bib_database = bibtexparser.load(bib_file)


FileNotFoundError: [Errno 2] No such file or directory: 'bibkeys.bib'

In [None]:
bibs = []
for entry in bib_database.entries:
    bibs.append({'join_key':int(entry['join_key']),'bibkey':entry['ID']})
bibs_df = pd.DataFrame(bibs)

In [None]:
bibs_df.set_index('join_key', inplace=True)

In [None]:
conferences_df = conferences_df.merge(bibs_df,left_index=True,right_index=True,how='left')
conferences_df.drop(['bibtex_clean','bibtex_title'],inplace=True,axis=1)

In [None]:
conferences_df.to_csv('final_list_bibtex.csv',index=False)

### Fixing Bibtex author and editor information for *ACL venues

In [14]:
conferences_df = pd.read_csv('../data/final_list_bibtex.csv')

In [24]:
import bibtexparser

bibtex_writer = bibtexparser.bwriter.BibTexWriter()
conferences_df['bibtex_clean'] = conferences_df['bibtex'].apply(lambda x: bibtexparser.loads(x))
for i,row in conferences_df.iterrows():
    row['bibtex_clean'].entries[0]['join_key'] = str(i)
    row['bibtex_clean'].entries[0]['ID'] = row['bibkey']
    row['bibtex_clean'].entries[0]['author'] = row['bibtex_clean'].entries[0]['author'].replace(' and ', ' and ')
    row['bibtex_clean'].entries[0]['abstract'] = ''
    row['bibtex_clean'].entries[0]['month'] = ''

conferences_df['bibtex_clean'] = conferences_df['bibtex_clean'].apply(lambda x: bibtex_writer.write(x))
conferences_df['bibtex_title'] = conferences_df['bibtex'].apply(lambda x: bibtexparser.loads(x).entries[0]['title'])

In [25]:
with open('../data/benchmark_review.bib', 'w') as file:
    for i,row in conferences_df.iterrows():
       print(row['bibtex_clean'], file=file)