In [None]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive


In [None]:
# @title ##### Get Conference Data

"""
MAG - Microsoft Academic Graph
==============================
MAG provides data of all research papers.
""" 
import json
import requests
from pprint import pprint
import csv
import pandas as pd
from datetime import date
import ast

class MAG:
  """
  Arguments:
  ---------
    - conference : Name of the conference we need to fetch data for
    - data_fetch : What kind of Data to fetch from MAG
    - start_date : start date of data
    - end_date : end date of data
    - retry : Number of attemts to connect to MAG
  """ 
  def __init__(self, **kwargs):
    df = pd.read_excel("/content/drive/My Drive/AI Index Visualization Project/DATA HARVESTORS/MAG/Conferences/Conferences.xlsx")
    self.e_d = date.today().strftime("%Y-%m-%d")
    self.s_d = kwargs.get('start_date').split('-')
    self.e_d = kwargs.get('end_date').split('-')
    self.s_d = int(self.s_d[0])
    self.e_d = int(self.e_d[0])
    self.start_date = kwargs.get('start_date')
    self.end_date = kwargs.get('end_date')
    self.conference = kwargs.get('conference')
    self.data_fetch = kwargs.get('data_fetch') 
    self.retry = kwargs.get('retry')
    self.conference_id = int(df[df['Conference Name']==self.conference]['Conference ID'])
    print(self.conference,":",self.conference_id)
  def getData(self):
    """
    Data from MAG.

    RETURNS
    -------
    - Return dataframe containing MAG data
    """
    from time import sleep
    retry = 0
    while(True):
        try:
          if(retry == self.retry):
            print("Failed to connect to MAG \
                      please recheck the connection")
            
            break
          endpoint = 'https://api.labs.cognitive.microsoft.com/'
          api_version = 'academic/v1.0/evaluate?'
          headers = {'Ocp-Apim-Subscription-Key': 'f4191fe719cf4659813a86e6830b2546',}
          if self.data_fetch == "Fields wise affiliations":
            searchstring = "And(Composite(C.CId={}),Y=[{},{}])".format(self.conference_id,self.s_d,self.e_d)
            url = endpoint + api_version + "expr=" + searchstring + "&attributes=AA.DAfN,VSN,Y,F.FN" + "&count=100000" + "&orderby=Y:asc"
          elif self.data_fetch == "Publication Count":
            searchstring = "And(Composite(PCS.CN=='{}'),CISD=['{}','{}'])".format(self.conference,self.start_date,self.end_date)
            url = endpoint + api_version + "expr=" + searchstring + "&attributes=CISD,PC,CIL,PCS.CN" + "&count=100000" + "&orderby=CISD:asc"
          elif self.data_fetch == "Author":
            searchstring = "And(Composite(C.CId={}),Y=[{},{}])".format(self.conference_id,self.s_d,self.e_d)
            url = endpoint + api_version + "expr=" + searchstring + "&attributes=Ti,Y,AA.DAuN,AA.DAfN,VSN" + "&count=100000" + "&orderby=Y:asc"
          elif self.data_fetch == "Fields of Study":
            searchstring = "And(Composite(C.CId={}),Y=[{},{}])".format(self.conference_id,self.s_d,self.e_d)
            url = endpoint + api_version + "expr=" + searchstring + "&attributes=Ti,VSN,Y,F.FN" + "&count=100000" + "&orderby=Y:asc"
            
          response  = requests.get(url, headers=headers)
          query = response.json()
          self.query = query
          self.json_to_csv()
          self.data = self.basicPreprocess()
          if self.data_fetch == "Author" or self.data_fetch == "Fields wise affiliations":
            self.data = self.grid()
          print (self.conference)
          return self.data
        except Exception as e:
                retry = retry + 1
                print("MAG Exception: ", e)
                print("Reattempting the server: ", retry)
                sleep(60)  

  def json_to_csv(self):
    json_object = json.dumps(self.query, indent = 6) 
    with open("mag.json", "w") as outfile: 
      outfile.write(json_object)     
    with open('mag.json') as json_file: 
      data = json.load(json_file) 

    data_entities = data['entities'] 

    # now we will open a file for writing 
    data_file = open('data_file.csv', 'w') 

    # create the csv writer object 
    csv_writer = csv.writer(data_file) 

    # Counter variable used for writing 
    # headers to the CSV file 
    count = 0

    for ent in data_entities: 
      if count == 0: 

        # Writing headers of CSV file 
        header = ent.keys() 
        csv_writer.writerow(header) 
        count += 1

      # Writing data of CSV file 
      csv_writer.writerow(ent.values()) 

    data_file.close()

  def basicPreprocess(self):
    """
    Performs basic preprocessing steps.
    """
    da = pd.read_csv('data_file.csv')
    if self.data_fetch == "Fields wise affiliations":
      da['F'] = da['F'].apply(ast.literal_eval)
      df1 = pd.concat({k:pd.DataFrame(v) for k, v in da['F'].items()})
      da = da.join(df1.reset_index(level=1, drop=True)).reset_index(drop=True)
      da = da.drop(['logprob','prob','F'],axis = 1)
      da = da.rename(columns={'FN':'Field_of_study','VSN':'Conference','Y':'Date'})
      da['AA'] = da['AA'].apply(ast.literal_eval)
      df1 = pd.concat({k:pd.DataFrame(v) for k, v in da['AA'].items()})
      da = da.join(df1.reset_index(level=1, drop=True)).reset_index(drop=True)
      da = da.drop(['AA'],axis = 1)
      da = da.rename(columns={'DAfN':'Affiliation'})
      da = da.dropna()
      da = da.groupby(['Date','Field_of_study','Affiliation']).agg('count')
      da = da.rename(columns={'Conference':'Publication_Count'})
      conf = [f'{self.conference}'] * len(da)
      da['Conference'] = conf

    elif self.data_fetch == "Author":
      da['AA'] = da['AA'].apply(ast.literal_eval)
      df1 = pd.concat({k:pd.DataFrame(v) for k, v in da['AA'].items()})
      da = da.join(df1.reset_index(level=1, drop=True)).reset_index(drop=True)
      da = da.drop(['logprob','prob','AA'],axis = 1)
      da = da.rename(columns={'DAfN':'Affiliation','DAuN':'Author','VSN':'Conference','Y':'Date'})

    elif self.data_fetch == "Publication Count":
      da = da.drop(['logprob','prob','PCS'],axis = 1)
      da = da.rename(columns={'CIL':'Location','CSID':'Date','PC':'Publication_Count'})
      conf = [f'{self.conference}'] * len(da)
      da['Conference_Name'] = conf
    elif self.data_fetch == "Fields of Study":
      da['F'] = da['F'].apply(ast.literal_eval)
      df1 = pd.concat({k:pd.DataFrame(v) for k, v in da['F'].items()})
      da = da.join(df1.reset_index(level=1, drop=True)).reset_index(drop=True)
      da = da.drop(['logprob','prob','F'],axis = 1)
      da = da.rename(columns={'FN':'Field_of_study','VSN':'Conference','Y':'Date'})
    da.to_csv("data_file.csv")

    return da 

  def grid(self):
    da = pd.read_csv("data_file.csv")
    address = pd.read_csv('/content/drive/My Drive/AI Index Visualization Project/DATASETS/Conference data/Country data/Grid/addresses.csv')
    grid = pd.read_csv('/content/drive/My Drive/AI Index Visualization Project/DATASETS/Conference data/Country data/Grid/grid.csv')
    address = address.rename(columns={'grid_id':'ID'})
    c = pd.merge(address,grid,how = 'inner', on = ['ID'])
    c = c[['Name','City','State','Country','lat','lng']]
    c = c.rename(columns={'Name':'Affiliation'})
    cc = pd.merge(da,c,how = 'left', on = ['Affiliation'])
    cc = cc.fillna('')
    cc = cc[cc['lat']!='']
    cc.to_csv('data_file.csv')
    return cc

  

def main():
  Conference = "EMNLP" #@param {type : "string"}
  Start_date = "1980-01-01" #@param {type : "string"}
  End_date = "2020-01-01" #@param {type : "string"}
  Data_to_fetch = "Fields of Study" #@param ["Publication Count","Fields of Study", "Author","Fields wise affiliations"]
  a = MAG(conference = Conference,start_date = Start_date,end_date = End_date,data_fetch=Data_to_fetch,retry=1)
  q = a.getData()
  q.to_csv(f"{Conference}.csv")

if __name__ == "__main__":
    main()
    

AAAI : 1184914352
AAAI
AAMAS : 1168671587
AAMAS
ACL : 1188739475
ACL
CP : 1201491352
CP
CVPR : 1158167855
CVPR
ECCV : 1124077590
ECCV
ICAPS : 1163360771
ICAPS
ICASSP : 1121227772
ICASSP
ICCV : 1164975091
ICCV
ICLR : 2584161585
ICLR
ICML : 1180662882
ICML
ICRA : 1163902177
ICRA
IJCAI : 1203999783
IJCAI
IROS : 1143279144
IROS
KDD : 1130985203
KDD
KR : 1155137614
KR
NeurIPS : 1127325140
NeurIPS
UAI : 1204606053
UAI
AIES : 2898559599
AIES
CHI : 1163450153
CHI
CLEO : 2623670637
CLEO
ICC : 1130451194
ICC
NAACL : 1173951661
NAACL
IGARSS : 1195013065
IGARSS
MSE : 1147218201
MSE
OFC : 2621451516
OFC
CDC : 1198780418
CDC
EMBC : 2232857946
EMBC
WWW : 1135342153
WWW
EQEC : 2755266407
EQEC
AISTATS : 2622962978
AISTATS
SOCO : 1123077274
SOCO
GLOBECOM : 1131420910
GLOBECOM
INTERSPEECH : 1177287137
INTERSPEECH
ACC : 1190039108
ACC
IJCNLP : 1126706392
IJCNLP
ISBI : 1187587159
ISBI
HICSS : 1164519180
HICSS
IOT : 1168863100
IOT
MICCAI : 1129324708
MICCAI
ISAP : 2623387249
ISAP
Big Data : 2623113034
Big D

In [None]:
import pandas as pd
df = pd.read_excel("/content/drive/My Drive/AI Index Visualization Project/DATA HARVESTORS/MAG/Conferences/Conferences.xlsx")
# df = df.tail(88)

In [None]:
l = list(df['Conference Name'])

In [None]:
l

['AAAI',
 'AAMAS',
 'ACL',
 'CP',
 'CVPR',
 'ECCV',
 'ICAPS',
 'ICASSP',
 'ICCV',
 'ICLR',
 'ICML',
 'ICRA',
 'IJCAI',
 'IROS',
 'KDD',
 'KR',
 'NeurIPS',
 'UAI',
 'AIES',
 'CHI',
 'CLEO',
 'ICC',
 'NAACL',
 'IGARSS',
 'MSE',
 'OFC',
 'CDC',
 'EMBC',
 'WWW',
 'EQEC',
 'AISTATS',
 'SOCO',
 'GLOBECOM',
 'INTERSPEECH',
 'ACC',
 'IJCNLP',
 'ISBI',
 'HICSS',
 'IOT',
 'MICCAI',
 'ISAP',
 'Big Data',
 'APEC',
 'SIGCSE',
 'SIGMOD',
 'INTERACT',
 'ICIP',
 'ISIT',
 'SIGIR',
 'WCNC',
 'SODA',
 'ICAI',
 'LREC',
 'WACV',
 'ECCE',
 'DATE',
 'MEMS',
 'S&P',
 'VTC',
 'ISCAS',
 'ICDE',
 'VLDB',
 'ISSCC',
 'ECC',
 'IJCNN',
 'IRMMW-THz',
 'TRANSDUCERS',
 'CCC',
 'ICME',
 'IUS',
 'IECON',
 'HRI',
 'CSL',
 'EuCAP',
 'ICSE',
 'SENSORS',
 'STOC',
 'PES',
 'MM',
 'DAC',
 'ITSC',
 'ASILOMAR',
 'CIKM',
 'VR',
 'NSS/MIC',
 'ECOC',
 'INFOCOM',
 'AeroConf',
 'PVSC',
 'ICEMS',
 'HiPC',
 'WSDM',
 'GECCO',
 'IMS',
 'CCS',
 'SMC',
 'EIConRus',
 'EUSIPCO',
 'SIGGRAPH',
 'PowerTech',
 'SAC',
 'ASPLOS',
 'ADHOCNETS',
 'I