# PYTHON WRAPPER FOR NSF AWARD SEARCH API

https://www.research.gov/common/webapi/awardapisearch-v1.htm

As per NSF notification, there could be downtime, service disruption which are typically on weekends starting at 10PM Friday through 12PM Sunday. so, if you get an error like:
```python
HTTPError: HTTP Error 503: Service Temporarily Unavailable
```

it is likely because the server is going thru some maintenance.

Record Offset	No	offset	Enter the record offset (always starts with 1). This is used in conjunction with results per page to fetch large data sets in chunks. For example, if a search produces 82 results and the result per page is set to 25, this will generate 4 set of pages. 3 pages will have 25 results and the last page will have 7 results. Record offset value will be
Page 1: offset=1
Page 2: offset=26
Page 3: offset=51
Page 4: offset=76


check the website for more info

save on a database SQLite for future analysis

In [1]:
import urllib.request
import urllib.parse
import json
import sqlite3
import glob

## REQUEST NSF API

In [124]:
url = 'http://api.nsf.gov/services/v1/awards.json?'
params = urllib.parse.urlencode({'id': 1656688})
response = urllib.request.urlopen('{}{}'.format(url, params) )
data_r = json.loads(response.read().decode(response.info().get_param('charset') or 'utf-8'))
print('Example of existing award: \n {} \n \n'.format(data_r))

params = urllib.parse.urlencode({'id': 1})
response = urllib.request.urlopen('{}{}'.format(url, params) )
data_r = json.loads(response.read().decode(response.info().get_param('charset') or 'utf-8'))
print('Example of Non-existing award: \n {} \n \n'.format(data_r))



Example of existing award: 
 {'response': {'award': [{'piLastName': 'Boyd', 'awardeeCity': 'Newark', 'title': 'Regulatory Pathways of the Osmotic Stress Response in Bacteria', 'id': '1656688', 'awardeeName': 'University of Delaware', 'date': '03/12/2017', 'awardeeStateCode': 'DE', 'fundsObligatedAmt': '237824', 'publicAccessMandate': '1', 'agency': 'NSF', 'piFirstName': 'Fidelma'}]}} 
 

Example of Non-existing award: 
 {'response': {'award': []}} 
 



In [None]:
{'response': {'serviceNotification': [{'notificationType': 'ERROR', 'notificationCode': 'AwardAPI-002', 'notificationMessage': 'Invalid parameter(s) sent in the request. Invalid Parameter(s) {pageStart}'}]}

In [3]:
## Save data in SQLite database

In [4]:
import time
import numpy as np
from urllib.parse import quote
import re
 # delays for 5 seconds


    
class AwardScraper():
    
    def __init__(self, url, folder, dbname, table_name, overwrite_db):
        self.url = url
        self.table_name = table_name
        self.folder = folder
        self.dbname = dbname
        self.overwrite_db = overwrite_db
        self.failed_id = []
        self.n_fail = 0
        self.n_awards_in_page = 0
    
    
    def clean_str(self, s):
        clean = re.sub('[^/A-Za-z0-9-%]+', ' ', s)
        return clean
        
    def connect2_db(self):
        sql_files = glob.glob(self.folder+'*.db')
        if (self.dbname in sql_files) or (overwrite_db == False):
            
            self.conn = sqlite3.connect(self.folder+dbname)
            
            self.c = self.conn.cursor()
        else:
            #open connection to local file
            self.conn = sqlite3.connect(self.folder+dbname)
            self.c = self.conn.cursor()
            # Create table
            try:
                self.c.execute("CREATE TABLE research_awards (id, agency, awardeeCity, awardeeName, awardeeStateCode, fundsObligatedAmount, piFirstname, piLastName, publicAccessMandate, date, title, topic))".format(table_name))
                print('Created Table: {} in database {}'.format(table_name, self.folder+dbname) )
                # Save (commit) the changes
                self.conn.commit()
            except:
                print('Unable to create table')

            
    
    def fetch_info(self, request):
        '''
        request is a dict {'dateStart': value}
        
        '''
        params = urllib.parse.urlencode(request)
        response = urllib.request.urlopen('{}{}'.format(self.url, params), timeout=90)
        response_json = json.loads(response.read().decode(response.info().get_param('charset') or 'utf-8'))
        response_json = response_json['response']
        if 'award' in response_json:
            awards = response_json['award']
            print('Number of awards: {}'.format(len(awards)) )
            return awards
        else:
            print(response_json['serviceNotification'])
            return [] 
    
    
    def clean_award_data(self, info):
        categories = ['piLastName', 'awardeeCity', 'title', 'id', 'awardeeName', 'date', 'awardeeStateCode',
                      'fundsObligatedAmt', 'publicAccessMandate', 'agency', 'piFirstName']
        for category in categories:
            if category not in info:
                info[category] = ''
            info[category] = self.clean_str(info[category])
        
        return info
                
        
    
    def save_data2db(self, item):
        sql_command = "SELECT research_awards.id FROM research_awards WHERE research_awards.id = '{}';".format(item['id'])
        try:
            self.c.execute(sql_command)
            result = self.c.fetchall()
            if len(result) != 0:
                item_in_table = True
            else:
                item_in_table = False
            err_select = False
        except:
            # Insert a row of data
            print('Unable to execute SQL query: SELECT')
            err_select = True   
        if item_in_table == False:
            item = self.clean_award_data(item)
            sql_command = "INSERT INTO research_awards VALUES ('{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}');".format(item['id'], 
            item['agency'], 
            item['awardeeCity'], item['awardeeName'], item['awardeeStateCode'], 
            item['fundsObligatedAmt'], item['piFirstName'], item['piLastName'], 
            item['publicAccessMandate'], item['date'], item['title'], "")
            try:
                self.c.execute(sql_command)
                self.conn.commit()
                return True
            except:
                print('Unable to save data on database')
                return False

            
    def scan_page(self, page):
        for idx in range(len(page)):
            flag = self.save_data2db(page[idx])
            if flag==False:
                self.n_fail = self.n_fail + 1
        self.n_awards_in_page = len(page)

                
                
sleep_time = 0.01
url = 'http://api.nsf.gov/services/v1/awards.json?'
date ='03/16/1980'
flder = ''
table_name = "research_awards"
dbname = 'research_grants.db'
overwrite_db = True
Scraper = AwardScraper(url, '', dbname, table_name, overwrite_db)
Scraper.connect2_db()

n_awards_per_page = 25
n_awards = n_awards_per_page
offset = 9528

while n_awards > 0:
    print('Page number: {}'.format(offset) ) 
    request = {'dateStart': date, 'offset': offset}
    response = Scraper.fetch_info(request)
    if len(response) != 0:
        Scraper.scan_page(response)
        n_awards = Scraper.n_awards_in_page
        offset += n_awards_per_page
    else:
        print('Page empty')
        n_awards = -1   

#url = 'http://api.nsf.gov/services/v1/awards.json?'
#params = urllib.parse.urlencode({'id': 1656688})
#response = urllib.request.urlopen('{}{}'.format(url, params) )
#data_r = json.loads(response.read().decode(response.info().get_param('charset') or 'utf-8'))
#print('Example of existing award: \n {} \n \n'.format(data_r))


#if flag_save == True:
#        n_award += 1
#    #time.sleep(sleep_time)
#    duration = round(time.time() - start, 0)
#    if duration%100  == 0:
#        print('Which one: ', idx)
    
#print('Total Number of award saved: {}'.format(n_award))


Page number: 9528
Number of awards: 25
Page number: 9553
Number of awards: 25
Page number: 9578
Number of awards: 25
Page number: 9603
Number of awards: 25
Page number: 9628
Number of awards: 25
Page number: 9653
Number of awards: 25
Page number: 9678
Number of awards: 25
Page number: 9703
Number of awards: 25
Page number: 9728
Number of awards: 25
Page number: 9753
Number of awards: 25
Page number: 9778
Number of awards: 25
Page number: 9803
Number of awards: 25
Page number: 9828
Number of awards: 25
Page number: 9853
Number of awards: 25
Page number: 9878
Number of awards: 25
Page number: 9903
Number of awards: 25
Page number: 9928
Number of awards: 25
Page number: 9953
Number of awards: 25
Page number: 9978
Number of awards: 25
Page number: 10003
Number of awards: 25
Page number: 10028
Number of awards: 25
Page number: 10053
Number of awards: 25
Page number: 10078
Number of awards: 25
Page number: 10103
Number of awards: 25
Page number: 10128
Number of awards: 25
Page number: 10153
