In [None]:
# @title ##### Get Conference Data

"""
MAG - Microsoft Academic Graph
==============================
MAG provides data of all research papers.
""" 
import json
import requests
from pprint import pprint
import csv
import pandas as pd
from datetime import date
import ast

class MAG:
  """
  Arguments:
  ---------
    - Field : Name of the field we need to fetch data for
    - start_date : start date of data
    - end_date : end date of data
    - retry : Number of attemts to connect to MAG
  """ 
  def __init__(self, **kwargs):
    self.start_date = kwargs.get('start_date')
    self.end_date = kwargs.get('end_date')
    self.field = kwargs.get('field')
    self.retry = kwargs.get('retry')

  def getData(self):
    """
    Data from MAG.

    RETURNS
    -------
    - Return dataframe containing MAG data
    """
    from time import sleep
    retry = 0
    while(True):
        try:
          if(retry == self.retry):
            print("Failed to connect to MAG \
                      please recheck the connection")
            break
          endpoint = 'https://api.labs.cognitive.microsoft.com/'
          api_version = 'academic/v1.0/evaluate?'
          headers = {'Ocp-Apim-Subscription-Key': 'f4191fe719cf4659813a86e6830b2546',}
          dates = []
          dates.append(pd.date_range(start=self.start_date,end=self.end_date).strftime("%Y-%m-%d").tolist())
          dates = dates[0]
          count = 0
          for i in range(len(dates)-1):
            try:
              print(dates[i])
              searchstring = "And(Composite(F.FN='{}'),D=['{}','{}'])".format(self.field,dates[i],dates[i+1])
              url = endpoint + api_version + "expr=" + searchstring + "&attributes=Ti,Y,D,AA.DAfN,CC,Pt" + "&count=100000" + "&orderby=Y:asc"
              print(url)
              response  = requests.get(url, headers=headers)
              query = response.json()
              self.query = query
              self.json_to_csv()
              if count == 0:
                da = pd.read_csv('data_file.csv')
                count+=1
              else:
                df = pd.read_csv('data_file.csv')
                da = pd.read_csv('MAG.csv')
                df = df.drop('Unnamed :0',axis = 1)
                da = da.drop('Unnamed :0',axis = 1)
                da = pd.concat([da,df])
              da.to_csv('MAG.csv')
            except:
              pass
          self.data = self.basicPreprocess()
          self.data = self.grid()
            
          # pprint (self.query)
          return self.data
        except Exception as e:
                retry = retry + 1
                print("MAG Exception: ", e)
                print("Reattempting the server: ", retry)
                sleep(60)  

  def json_to_csv(self):
    json_object = json.dumps(self.query, indent = 6) 
    with open("mag.json", "w") as outfile: 
      outfile.write(json_object)
          
    with open('mag.json') as json_file: 
      data = json.load(json_file) 

    data_entities = data['entities'] 

    # now we will open a file for writing 
    data_file = open('data_file.csv', 'w') 

    # create the csv writer object 
    csv_writer = csv.writer(data_file) 

    # Counter variable used for writing 
    # headers to the CSV file 
    count = 0

    for ent in data_entities: 
      if count == 0: 

        # Writing headers of CSV file 
        header = ent.keys() 
        csv_writer.writerow(header) 
        count += 1

      # Writing data of CSV file 
      csv_writer.writerow(ent.values()) 

    data_file.close()

  def basicPreprocess(self):
    """
    Performs basic preprocessing steps.
    """
    da = pd.read_csv('data_file.csv')
    da['AA'] = da['AA'].apply(ast.literal_eval)
    df1 = pd.concat({k:pd.DataFrame(v) for k, v in da['AA'].items()})
    da = da.join(df1.reset_index(level=1, drop=True)).reset_index(drop=True)
    da = da.drop(['logprob','prob','AA','D'],axis = 1)
    da = da.rename(columns={'DAfN':'Affiliation','Y':'Year','Ti':'Title','CC':'Citation_Count','Pt':'Publication_type'})
    da.Publication_type.replace([0, 1, 2, 3, 4, 5, 6, 7, 8], ['Unknown','Journal article','Patent','Conference paper','Book chapter','Book','Book reference entry','Dataset','Repository'], inplace=True)
    da.to_csv("data_file.csv")

    return da 

  def grid(self):
    da = pd.read_csv("data_file.csv")
    grid = pd.read_csv('/content/drive/My Drive/AI Index Visualization Project/DATASETS/Conference data/Country data/Grid/grid.csv')
    grid = grid[['Name','Country']].rename(columns={'Name':'Affiliation'})
    da_country = pd.merge(da,grid,how = 'left', on = ['Affiliation'])
    da_country = da_country.fillna('')
    da_country = da_country.drop(['Unnamed: 0'],axis =1)
    da_country.to_csv('data_file.csv')
    return da_country

  

def main():
  Field = "Artificial Intelligence" #@param {type : "string"}
  Field = Field.lower()
  Start_date = "1920-01-01" #@param {type : "string"}
  End_date = "2020-07-01" #@param {type : "string"}
  a = MAG(field = Field,start_date = Start_date,end_date = End_date,retry=5)
  q = a.getData()
  q.to_csv(f"/content/drive/My Drive/AI Index Visualization Project/DATASETS/Publication data/Version 1/{Field}.csv")

if __name__ == "__main__":
    main()
    

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
https://api.labs.cognitive.microsoft.com/academic/v1.0/evaluate?expr=And(Composite(F.FN='artificial intelligence'),D=['1937-01-22','1937-01-23'])&attributes=Ti,Y,D,AA.DAfN,CC,Pt&count=100000&orderby=Y:asc
1937-01-23
https://api.labs.cognitive.microsoft.com/academic/v1.0/evaluate?expr=And(Composite(F.FN='artificial intelligence'),D=['1937-01-23','1937-01-24'])&attributes=Ti,Y,D,AA.DAfN,CC,Pt&count=100000&orderby=Y:asc
1937-01-24
https://api.labs.cognitive.microsoft.com/academic/v1.0/evaluate?expr=And(Composite(F.FN='artificial intelligence'),D=['1937-01-24','1937-01-25'])&attributes=Ti,Y,D,AA.DAfN,CC,Pt&count=100000&orderby=Y:asc
1937-01-25
https://api.labs.cognitive.microsoft.com/academic/v1.0/evaluate?expr=And(Composite(F.FN='artificial intelligence'),D=['1937-01-25','1937-01-26'])&attributes=Ti,Y,D,AA.DAfN,CC,Pt&count=100000&orderby=Y:asc
1937-01-26
https://api.labs.cognitive.microsoft.com/academic/v1.0/evaluate?expr=An

In [11]:
da = pd.read_csv('data_file.csv')
da.tail(30)

Unnamed: 0.1,Unnamed: 0,Title,Publication_type,Year,Citation_Count,Affiliation,Country
20415,20415,to study the knowledge attitude and practices ...,Journal article,2019,0,,
20416,20416,a feature encoding based on low space complexi...,Journal article,2019,0,Kochi University of Technology,Japan
20417,20417,a feature encoding based on low space complexi...,Journal article,2019,0,Kochi University of Technology,Japan
20418,20418,on line patient interaction method for disease...,Journal article,2019,0,,
20419,20419,on line patient interaction method for disease...,Journal article,2019,0,,
20420,20420,systems and methods for strategically acquired...,Patent,2019,0,MAGNETIC RESONANCE CREATIVE IMAGING INC,
20421,20421,reconstructing magnetic resonance images with ...,Patent,2019,0,SHENYANG NEUSOFT MEDICAL SYSTEMS CO. LTD,
20422,20422,clustering mixed data based on density peaks a...,Journal article,2019,0,,
20423,20423,clustering mixed data based on density peaks a...,Journal article,2019,0,,
20424,20424,clustering mixed data based on density peaks a...,Journal article,2019,0,,
