In [6]:
import glob
import os
import os.path as osp
import re

In [79]:
tex_files = glob.glob('text/**/*.tex', recursive=True)
ref_pattern = re.compile(r'\\cite[t|p]?{([\w,]+)}')

In [118]:
def format_bib_key(bib_key):
    return ''.join([word.lower().capitalize() for word in bib_key.replace('-', '_').split('_')])

In [151]:
def extract_refs(tex_content):
    found_refs = ref_pattern.findall(tex_content)
    
    all_refs = set()
    for ref in found_refs:
        all_refs = all_refs.union(set(ref.split(',')))
        
    return all_refs

In [154]:
all_used_refs = set()

for tex_file in tex_files:
    tex_file_content = open(tex_file).read()
    all_used_refs = all_used_refs.union(extract_refs(tex_file_content))
    
all_used_refs = sorted(list(set([format_bib_key(ref) for ref in all_refs])))

In [155]:
all_used_refs

['Adams2010learning',
 'Adc',
 'Ae',
 'Ba2016layern',
 'Bae2017confidence',
 'Bamler2017perturbative',
 'Bamlerperturbative2017',
 'Battaglia2016',
 'Bengio2009',
 'Bewley2016sort',
 'Blei2003latent',
 'Bomultrasonic1971',
 'Booth1973applying',
 'Bornschein2015reweighted',
 'Bourlard1988auto',
 'Brabandere2016dfn',
 'Bravatasystematic2007',
 'Burda2015importance',
 'Burda2016',
 'Burda2016importance',
 'Burgess2019monet',
 'Chater2006probabilistic',
 'Chatterjee2018sample',
 'Chen2014fast',
 'Chen2016variational',
 'Chen2018stochastic',
 'Cheung2016gtc',
 'Cho2015unsupervised',
 'Chung2015',
 'Clevert2015elu',
 'Cohen2016group',
 'Cohen2016steerable',
 'Cremer2017reinterpreting',
 'Danesh19',
 'Dayan1995helmholtz',
 'Dayan2001',
 'Dempster1977maximum',
 'Denton2017unsupervised',
 'Dronedataset',
 'Duarte',
 'Earley1970efficient',
 'Edlerultrasonic1957',
 'Encapsule',
 'Eslami2016',
 'Eslami2016attend',
 'Eth',
 'Footdemographics2000',
 'Fort2017mcmc',
 'Gael2009',
 'Gagliardicardiac199

In [78]:
bib_files = glob.glob('text/**/*.bib', recursive=True)
print(bib_files)

['text/MOHART/library.bib', 'text/SCA/library.bib', 'text/RRWS/main.bib', 'text/SQAIR/library.bib', 'text/HART/library.bib', 'text/tighter_bounds/refs.bib', 'text/tighter_bounds/max.bib']


In [82]:
master_bib = '\n'.join(open(bib_file).read() for bib_file in bib_files)

In [98]:
bib_entries = []

for entry in master_bib.split('@'):
    entry = entry.strip()
    if entry:
#         print(entry)
        bib_entries.append('@' + entry)

In [147]:
formatted_bib_entries = dict()
ref_map = dict()

key_pattern = re.compile(r'@\w+{([\w-]+),')
for entry in bib_entries:
    found_key = key_pattern.findall(entry)
    if not found_key:
        print('None', found_key, entry)
    else:
        found_key = found_key[0]
        target_key = format_bib_key(found_key)
#         print(found_key, target_key)
        
        entry = entry.replace(found_key, target_key)
        formatted_bib_entries[target_key] = entry
        
        if found_key != target_key:
            ref_map[found_key] = target_key

None [] @mit.edu  },
title = {{Learning Chaotic Attractors by Neural Networks}},
opturl = {http://www.mitpressjournals.org/optdoi/10.1162/089976600300014971},
volume = {12},
year = {2000}
}
None [] @mit.edu},
title = {{A Fast Learning Algorithm for Deep Belief Nets}},
opturl = {http://www.mitpressjournals.org/optdoi/10.1162/neco.2006.18.7.1527},
volume = {18},
year = {2006}
}
None [] @article{
	anonymous2018auto-encoding,
	title={Auto-Encoding Sequential Monte Carlo},
	author={Anonymous},
	journal={International Conference on Learning Representations},
	year={2018}
}


In [122]:
formatted_bib_entries

{'Kosiorek17': '@article{Kosiorek17,\n  author    = {Adam R. Kosiorek and\n               Alex Bewley and\n               Ingmar Posner},\n  title     = {Hierarchical Attentive Recurrent Tracking},\n  journal   = {Neural Information Processing Systems},\n  year      = {2017},\n}',
 'Vaswani17': '@article{Vaswani17,\n  author    = {Ashish Vaswani and\n               Noam Shazeer and\n               Niki Parmar and\n               Jakob Uszkoreit and\n               Llion Jones and\n               Aidan N. Gomez and\n               Lukasz Kaiser and\n               Illia Polosukhin},\n  title     = {Attention Is All You Need},\n  journal   = {Neural Information Processing Systems},\n  year      = {2017}\n}',
 'Kahou15': '@article{Kahou15,\n  author    = {Samira Ebrahimi Kahou and\n               Vincent Michalski and\n               Roland Memisevic},\n  title     = {{RATM:} Recurrent Attentive Tracking Model},\n  journal   = {IEEE Conference on Computer Vision and Pattern Recognition Wo

In [134]:
used_bib_entries = []

for used_ref in all_used_refs:
    
    entry = formatted_bib_entries.get(used_ref, None)
    
    if entry is None:
        print('missing ref: {}'.format(used_ref))
    
    else:
        used_bib_entries.append(entry)

missing ref: Bamlerperturbative2017
missing ref: Bomultrasonic1971
missing ref: Bravatasystematic2007
missing ref: Edlerultrasonic1957
missing ref: Footdemographics2000
missing ref: Gagliardicardiac1996
missing ref: Gagliardirontgen1996
missing ref: Gagliardiultrasonography1996
missing ref: Griffithsector1974
missing ref: Harveyexercitatio1628
missing ref: Hologan
missing ref: Ilsvrc15
missing ref: Kthactivityrecognition
missing ref: Leesoncardiovascular2011
missing ref: Maaloeauxiliary2016
missing ref: Otb
missing ref: Rezendevariational2015
missing ref: Salimansmarkov2015
missing ref: Savarese2016goturn
missing ref: Sonderbyladder2016
missing ref: Sousanew2005
missing ref: Tranvariational2015
missing ref: Valmadre2017cfnn
missing ref: Vongoethewilhelm1829
missing ref: Webbintroduction2002


In [139]:
print(len(used_bib_entries))
print(len(formatted_bib_entries))
print(len(all_used_refs))

209
522
234


In [144]:
master_bib_file = '\n\n'.join(used_bib_entries)
with open('references.bib', 'w') as f:
    f.write(master_bib_file)

In [148]:
ref_map

{'MOT16': 'Mot16',
 'DroneDataset': 'Dronedataset',
 'li2019way': 'Li2019way',
 'su2016crowd': 'Su2016crowd',
 'fernando2018soft': 'Fernando2018soft',
 'vemula2018social': 'Vemula2018social',
 'choi2019learning': 'Choi2019learning',
 'zhang2019sr': 'Zhang2019sr',
 'sadeghian2019sophie': 'Sadeghian2019sophie',
 'varshneya2017human': 'Varshneya2017human',
 'scholler2019simpler': 'Scholler2019simpler',
 'UCY': 'Ucy',
 'ETH': 'Eth',
 'grewal2011kalman': 'Grewal2011kalman',
 'IGP': 'Igp',
 'rudenko2018joint': 'Rudenko2018joint',
 'yamaguchi2011you': 'Yamaguchi2011you',
 'social-lstm': 'SocialLstm',
 'sun20183dof': 'Sun20183dof',
 'bae2017confidence': 'Bae2017confidence',
 'keuper2018motion': 'Keuper2018motion',
 'kosiorek2017hierch': 'Kosiorek2017hierch',
 'lecun2015deep': 'Lecun2015deep',
 'kemp2008discovery': 'Kemp2008discovery',
 'lecun1989backpropagation': 'Lecun1989backpropagation',
 'cho2015unsupervised': 'Cho2015unsupervised',
 'kwak2015unsupervised': 'Kwak2015unsupervised',
 'xiao20

In [157]:
for tex_file in tex_files:
    content = open(tex_file).read()
    refs = extract_refs(content)
    replaced = False
    for ref in refs:
        if ref in ref_map:
            content = content.replace(ref, ref_map[ref])
            replaced = True
            
    if replaced:
        with open(tex_file, 'w') as f:
            f.write(content)