In [None]:
# @title ##### Get Conference Data

"""
MAG - Microsoft Academic Graph
==============================
MAG provides data of all research papers.
""" 
import json
import requests
from pprint import pprint
import csv
import pandas as pd
from datetime import date
import ast

class MAG:
  """
  Arguments:
  ---------
    - conference : Name of the conference we need to fetch data for
    - data_fetch : What kind of Data to fetch from MAG
    - start_date : start date of data
    - end_date : end date of data
    - retry : Number of attemts to connect to MAG
  """ 
  def __init__(self, **kwargs):
    df = pd.read_excel("/content/drive/My Drive/AI Index Visualization Project/DATA HARVESTORS/MAG/Conferences/Conferences.xlsx")
    self.e_d = date.today().strftime("%Y-%m-%d")
    self.s_d = kwargs.get('start_date').split('-')
    self.e_d = kwargs.get('end_date').split('-')
    self.s_d = int(self.s_d[0])
    self.e_d = int(self.e_d[0])
    self.start_date = kwargs.get('start_date')
    self.end_date = kwargs.get('end_date')
    self.conference = kwargs.get('conference')
    self.data_fetch = kwargs.get('data_fetch') 
    self.retry = kwargs.get('retry')
    self.conference_id = int(df[df['Conference Name']==self.conference]['Conference ID'])
    print(self.conference_id)
  def getData(self):
    """
    Data from MAG.

    RETURNS
    -------
    - Return dataframe containing MAG data
    """
    from time import sleep
    retry = 0
    while(True):
        try:
          if(retry == self.retry):
            print("Failed to connect to MAG \
                      please recheck the connection")
            break
          endpoint = 'https://api.labs.cognitive.microsoft.com/'
          api_version = 'academic/v1.0/evaluate?'
          headers = {'Ocp-Apim-Subscription-Key': 'f4191fe719cf4659813a86e6830b2546',}
          searchstring = "FN={}".format(self.conference_id)
          url = endpoint + api_version + "expr=" + searchstring + "&attributes=" + "&count=100000" + "&orderby=Y:asc"
          response  = requests.get(url, headers=headers)
          query = response.json()
          self.query = query
          self.json_to_csv()
          self.data = self.basicPreprocess()
          if self.data_fetch == "Author" or self.data_fetch == "Fields of Study":
            self.data = self.grid()
          # print (self.query)
          return self.data
        except Exception as e:
                retry = retry + 1
                print("MAG Exception: ", e)
                print("Reattempting the server: ", retry)
                sleep(60)  

  def json_to_csv(self):
    json_object = json.dumps(self.query, indent = 6) 
    with open("mag.json", "w") as outfile: 
      outfile.write(json_object)     
    with open('mag.json') as json_file: 
      data = json.load(json_file) 

    data_entities = data['entities'] 

    # now we will open a file for writing 
    data_file = open('data_file.csv', 'w') 

    # create the csv writer object 
    csv_writer = csv.writer(data_file) 

    # Counter variable used for writing 
    # headers to the CSV file 
    count = 0

    for ent in data_entities: 
      if count == 0: 

        # Writing headers of CSV file 
        header = ent.keys() 
        csv_writer.writerow(header) 
        count += 1

      # Writing data of CSV file 
      csv_writer.writerow(ent.values()) 

    data_file.close()

  def basicPreprocess(self):
    """
    Performs basic preprocessing steps.
    """
    da = pd.read_csv('data_file.csv')
    if self.data_fetch == "Fields of Study":
      da['F'] = da['F'].apply(ast.literal_eval)
      df1 = pd.concat({k:pd.DataFrame(v) for k, v in da['F'].items()})
      da = da.join(df1.reset_index(level=1, drop=True)).reset_index(drop=True)
      da = da.drop(['logprob','prob','F'],axis = 1)
      da = da.rename(columns={'FN':'Field_of_study','VSN':'Conference','Y':'Date'})
      da['AA'] = da['AA'].apply(ast.literal_eval)
      df1 = pd.concat({k:pd.DataFrame(v) for k, v in da['AA'].items()})
      da = da.join(df1.reset_index(level=1, drop=True)).reset_index(drop=True)
      da = da.drop(['AA'],axis = 1)
      da = da.rename(columns={'DAfN':'Affiliation'})
      da = da.dropna()
      da = da.groupby(['Date','Field_of_study','Affiliation']).agg('count')
      da = da.rename(columns={'Conference':'Publication_Count'})
      conf = [f'{self.conference}'] * len(da)
      da['Conference'] = conf

    elif self.data_fetch == "Author":
      da['AA'] = da['AA'].apply(ast.literal_eval)
      df1 = pd.concat({k:pd.DataFrame(v) for k, v in da['AA'].items()})
      da = da.join(df1.reset_index(level=1, drop=True)).reset_index(drop=True)
      da = da.drop(['logprob','prob','AA'],axis = 1)
      da = da.rename(columns={'DAfN':'Affiliation','DAuN':'Author','VSN':'Conference','Y':'Date'})

    elif self.data_fetch == "Publication Count":
      da = da.drop(['logprob','prob','PCS'],axis = 1)
      da = da.rename(columns={'CIL':'Location','CSID':'Date','PC':'Publication_Count'})
      conf = [f'{self.conference}'] * len(da)
      da['Conference_Name'] = conf

    da.to_csv("data_file.csv")

    return da 

  def grid(self):
    da = pd.read_csv("data_file.csv")
    address = pd.read_csv('/content/drive/My Drive/AI Index Visualization Project/DATASETS/Conference data/Country data/Grid/addresses.csv')
    grid = pd.read_csv('/content/drive/My Drive/AI Index Visualization Project/DATASETS/Conference data/Country data/Grid/grid.csv')
    address = address.rename(columns={'grid_id':'ID'})
    c = pd.merge(address,grid,how = 'inner', on = ['ID'])
    c = c[['Name','City','State','Country','lat','lng']]
    c = c.rename(columns={'Name':'Affiliation'})
    cc = pd.merge(da,c,how = 'left', on = ['Affiliation'])
    cc = cc.fillna('')
    cc = cc[cc['lat']!='']
    cc.to_csv('data_file.csv')
    return cc

  

def main():
  Conference = "AAAI" #@param {type : "string"}
  Start_date = "1980-01-01" #@param {type : "string"}
  End_date = "2020-01-01" #@param {type : "string"}
  Data_to_fetch = "Publication Count" #@param ["Publication Count","Fields of Study", "Author"]
  a = MAG(conference = Conference,start_date = Start_date,end_date = End_date,data_fetch=Data_to_fetch,retry=5)
  q = a.getData()
  q.to_csv(f"{Conference}.csv")

if __name__ == "__main__":
    main()
    