# Crawler of Call for Papers (source: http://www.wikicfp.com)
    

## Example on how to retrieve the call for paper deadlines of 'TMA 2019'

In [74]:
url="http://www.wikicfp.com/cfp/servlet/tool.search?q=TMA+2019&year=2019"

## Libraries

In [538]:
import cfscrape
from lxml import etree
import pandas as pd
import urllib
import numpy as np
import matplotlib.pyplot as plt

pd.set_option('display.max_colwidth', -1)

## Function for retrieving the CFP information

In [545]:
def retrieve_cfp_info (conference,year):
    ###Parsing the name of the conference
    conference_url = urllib.parse.quote_plus(conference)

    ### Composing the URL
    url = "http://www.wikicfp.com/cfp/servlet/tool.search?q="+conference_url+"&year="+str(year)

    ### Instanciating the Scraper
    scraper = cfscrape.create_scraper()
    
    ### Scraping a URL
    scraped_html=scraper.get(url).content
    
    try:
        ### Get tables in the HTML
        tables = pd.read_html(scraped_html)

        ### Reading the dataframe
        df=pd.DataFrame({'conference': tables[1][0],'location':tables[1][1],'deadline':tables[1][2]})

        ### Drop the first lines till line 'event'
        df.drop(df.index[:df.loc[df['conference'] == 'Event'].index[0]+1],inplace=True) 

        ### Merge odds and even rows
        df = pd.concat([df.iloc[0::2].reset_index(drop=True), df.iloc[1::2].reset_index(drop=True)],  sort=True, axis=1)
        df['requested']=conference+" "+str(year)

        ### Changing the name of the columns
        df.columns = ['retrieved', 'name', 'blank','conference_date','location','hard_deadline','requested']

        ### Drop the blank column
        df.drop(columns=['blank'], inplace=True)
          
        try:
            df['hard_deadline'] = df['hard_deadline'][0].split(' (')[0]
        except:
            next

        df['hard_deadline'] = pd.to_datetime(df['hard_deadline'], format='%b %d, %Y',errors='coerce')

        return df.iloc[0]
    
    except:
        return pd.DataFrame({'retrieved': np.NaN, 'name':np.NaN, 'conference_date':np.NaN,'location':np.NaN,'hard_deadline':np.NaN,'requested':[conference+" "+str(year)] })

## Example:

In [546]:
retrieve_cfp_info('NOMS',2018)

retrieved          NOMS 2018                                  
name               Network Operations and Management Symposium
conference_date    Apr 23, 2018 - Apr 27, 2018                
location           Taipei, Taiwan, China                      
hard_deadline      2017-09-05 00:00:00                        
requested          NOMS 2018                                  
Name: 0, dtype: object


## Retrieving for a list of conferences

In [514]:


top_conferences = ['SIGCOMM',
                   'USENIX ATC']





In [552]:
year = 2019
measurement_conferences = ['PAM',
                           'TMA',
                           'IMC']

print('Measurement Conferences:')
result=pd.DataFrame()
for conference in measurement_conferences:
    result = result.append(retrieve_cfp_info(conference,year), ignore_index=True, sort=True)

result[['requested', 'retrieved', 'hard_deadline','name', 'conference_date','location']].sort_values(by=['hard_deadline']).reset_index(drop=True)

Measurement Conferences:


Unnamed: 0,requested,retrieved,hard_deadline,name,conference_date,location
0,PAM 2019,PAM 2019,2018-10-17,Passive and Active Network Measurement,"Mar 27, 2019 - Mar 29, 2019","Puerto Varas, Chile"
1,TMA 2019,TMA 2019,2019-02-15,Network Traffic Measurement and Analysis Conference,"Jun 19, 2019 - Jun 21, 2019","Paris, France"
2,IMC 2019,IMC Art & Science 2019,2019-02-15,Innovative Methodologies: International Art & Science Conference,"Apr 9, 2019 - Apr 11, 2019","University of Zagreb, Zagreb, Croatia"


In [543]:
security_conferences = ['IEEE S&P',
                        'USENIX SECURITY', 
                        'ACM AsiaCCS', 
                        'ESORICS',
                        'CSF',
                        'ACISP',
                        'SOUPS',
                        'ACM CCS',
                        'DSN',
                        'IFIPSEC',
                        'DFRWS',
                        'DIMVA',
                        'CSFW',
                        'ISC',
                        'ICICS',
                        'RAID',
                        'CNS',
                        'NSPW',
#                         'USENIX WOOT',
                        'ACSAC',
                        'SecureComm',
                        'MALWARE',
                        'EURO S&P',
                        'NDSS',
                        'ICISC',
                        'CODASPY',
                        'SAC']

print('Security Conferences:')
result=pd.DataFrame()
for conference in security_conferences:
    result = result.append(retrieve_cfp_info(conference,year), ignore_index=True, sort=True)


result[['requested', 'retrieved', 'hard_deadline','name', 'conference_date','location']].sort_values(by=['hard_deadline']).reset_index(drop=True)


Security Conferences:


Unnamed: 0,requested,retrieved,hard_deadline,name,conference_date,location
0,NDSS 2019,NDSS 2019,2018-08-07,Network and Distributed System Security Symposium,"Feb 24, 2019 - Feb 27, 2019","San Diego, CA"
1,CODASPY 2019,CODASPY 2019,2018-09-24,ACM Conference on Data and Application Security and Privacy,"Mar 25, 2019 - Mar 27, 2019","Dallas, TX, USA"
2,USENIX SECURITY 2019,FAST 2019,2018-09-26,17th USENIX Conference on File and Storage Technologies,"Feb 25, 2019 - Feb 28, 2019","Boston, MA, USA"
3,IEEE S&P 2019,IEEE S&P 2019,2018-12-01,IEEE Symposium on Security and Privacy,"May 20, 2019 - May 22, 2019","San Francisco, CA"
4,EURO S&P 2019,IEEE S&P 2019,2018-12-01,IEEE Symposium on Security and Privacy,"May 20, 2019 - May 22, 2019","San Francisco, CA"
5,DSN 2019,DSN 2019,2018-12-07,Dependable Systems and Networks,"Jun 24, 2019 - Jun 27, 2019","Portland, Oregon, USA"
6,ISC 2019,ISC HPC 2019,2019-01-07,ISC High Performance 2019,"Jun 16, 2019 - Jun 20, 2019","Frankfurt, Germany"
7,ACM AsiaCCS 2019,ACM ASIACCS 2019,2019-01-15,The 14th ACM ASIA Conference on Computer and Communications Security,"Jul 7, 2019 - Jul 12, 2019","Auckland, New Zeland"
8,DFRWS 2019,DFRWS 2019,2019-02-03,Digital Forensic Research Workshop,"Jul 14, 2019 - Jul 17, 2019","Portland, OR"
9,ACM CCS 2019,CCS 2019,2019-02-05,26th ACM Conference on Computer and Communications Security,"Nov 11, 2019 - Nov 15, 2019",London


## Management Conferences

In [544]:
year=2019
management_conferences = ['IFIP/IEEE IM',
                          'NOMS',
                          'CNSM',
                          'NETWORKING']

result=pd.DataFrame()
for conference in management_conferences:
    result = result.append(retrieve_cfp_info(conference,year), ignore_index=True, sort=True)

    
result = result[['requested', 'retrieved', 'hard_deadline','name', 'conference_date','location']].sort_values(by=['hard_deadline']).reset_index(drop=True)
result

Unnamed: 0,requested,retrieved,hard_deadline,name,conference_date,location
0,IFIP/IEEE IM 2019,IFIP/IEEE IM 2019,2018-09-10,IFIP/IEEE International Symposium on Integrated Network Management (IM 2019),"Apr 8, 2019 - Apr 12, 2019","Washington DC, USA"
1,NETWORKING 2019,IEEE--ICCT--Ei and Scopus 2019,2019-05-01,2019 19th IEEE International Conference on Communication Technology (ICCT 2019)--Ei Compendex and Scopus,"Oct 16, 2019 - Oct 19, 2019","Xi'an, China"
2,NOMS 2019,,NaT,,,
3,CNSM 2019,,NaT,,,


In [None]:
import matplotlib.pyplot as plt

fig = plt.figure(figsize=(9,2))
ax = plt.subplot(111)
ax.axis('off')
ax.table(cellText=df.values, colLabels=df.columns, bbox=[0,0,1,1])