API Documentation: https://developer.usajobs.gov/API-Reference/GET-api-Search

## INPUT PARAMETERS:

In [1046]:
# API Key 
# Request one at:
# https://developer.usajobs.gov/APIRequest/Index

apiKey = "INSERT_API_KEY_HERE"

In [1047]:
# specifies whether results are current job postings or archived job postings
# Y = Archived posts
# N = Current posts

archive = "N"

In [1048]:
# specifies the number of results to retrieve
# only for current searches

resultsPerPage = 500

In [1049]:
# date range
# ONLY FOR ARCHIVED JOB POSTINGS

# format: MM/DD/YYYY

startDate = "12/01/2016"
endDate = "12/30/2016"

In [1050]:
# Position Title

title = "data;science"

## CODE BELOW:

In [1051]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
import numpy as np
import time
import datetime

In [1052]:
urlSearch = "https://data.usajobs.gov/api/search?Keyword=" + title + "&KeywordFilter=All"

In [1053]:
urlArchive = "https://data.usajobs.gov/api/Archive?PositionTitle=" + title

In [1054]:
if (archive == "Y"):
    url = urlArchive
    url = url + "&PostingStartDate=" + startDate
    url = url + "&PostingEndDate=" + endDate
else:
    url = urlSearch
    url = url +  "&ResultsPerPage=" + str(resultsPerPage) 

In [1055]:
print("API GET URL CALL:")
print(url)

API GET URL CALL:
https://data.usajobs.gov/api/search?Keyword=data;engineer&KeywordFilter=All&ResultsPerPage=500


In [1056]:
headers = {"Authorization-Key": apiKey}

In [1057]:
res = requests.get(url, headers = headers, verify = False)



In [1058]:
print("API Response code: ", res.status_code)

API Response code:  200


In [1059]:
response = res.json()

In [1060]:
def parseResults(searchResults):
    
    ApplicationCloseDate = []
    ApplyURI = []
    JobGrade = []
    PositionStartDate = []
    PositionEndDate = []
    PositionID = []
    PositionLocation = []
    PositionLocationDisplay = []
    QualificationSummary = []
    OrganizationName = []
    DepartmentName = []
    OfferingType = []
    MinPay = []
    MaxPay = []
    PayType = []
    PositionTitle = []
    JobSummary = []
    
    for r in searchResults:
        if 'MatchedObjectDescriptor' in r:            
            rr =  r.get("MatchedObjectDescriptor", {})   
            
            r_ApplicationCloseDate = rr.get("ApplicationCloseDate", None)
            r_ApplyURI = rr.get("PositionURI", None)
            r_JobGrade = rr.get("JobGrade", None)[0]["Code"]
            r_PositionStartDate = rr.get("PositionStartDate", None)
            r_PositionEndDate = rr.get("PositionEndDate", None)
            r_PositionID = rr.get("PositionID", None)
            r_PositionLocationDisplay = rr.get("PositionLocationDisplay", None)
            r_QualificationSummary = rr.get("QualificationSummary", None)            
            r_OrganizationName = rr.get("OrganizationName", None)
            r_DepartmentName = rr.get("DepartmentName", None)    
            r_PositionLocation = rr.get("PositionLocation", None)       
            r_PositionTitle = rr.get("PositionTitle", None)
            
            locations = []
            for l in r_PositionLocation:
                locations.append(l.get("CityName", None)) 
            locations = '|'.join(str(locs) for locs in locations)
            
            r_OfferingType = rr.get("PositionOfferingType", None)[0]["Name"]
            r_MinPay = rr.get("PositionRemuneration")[0]["MinimumRange"]
            r_MaxPay = rr.get("PositionRemuneration")[0]["MaximumRange"]
            r_PayType = rr.get("PositionRemuneration")[0]["RateIntervalCode"] 
            r_JobSummary = rr.get("UserArea", {}).get("Details", {}).get("JobSummary", None)
            
            ApplicationCloseDate.append(r_ApplicationCloseDate)
            ApplyURI.append(r_ApplyURI)
            JobGrade.append(r_JobGrade)
            PositionEndDate.append(r_PositionEndDate)
            PositionID.append(r_PositionID)
            PositionLocationDisplay.append(r_PositionLocationDisplay)
            QualificationSummary.append(r_QualificationSummary)
            OrganizationName.append(r_OrganizationName)
            DepartmentName.append(r_DepartmentName)
            PositionLocation.append(locations)
            OfferingType.append(r_OfferingType)
            MinPay.append(r_MinPay)
            MaxPay.append(r_MaxPay)
            PayType.append(r_PayType)
            PositionStartDate.append(r_PositionStartDate)
            PositionTitle.append(r_PositionTitle)
            JobSummary.append(r_JobSummary)

    return pd.DataFrame({
            "PositionID": PositionID,
            "ApplicationCloseDate": ApplicationCloseDate,
            "JobGrade": JobGrade,
            "PositionEndDate": PositionEndDate,   
            "OrganizationName": OrganizationName, 
            "DepartmentName": DepartmentName,
            "QualificationSummary": QualificationSummary,
            "URI": ApplyURI,
            "PositionLocation": PositionLocation,
            "OfferingType": OfferingType,
            "MinPay": MinPay,
            "MaxPay": MaxPay,
            "PayType": PayType,
            "PositionStartDate": PositionStartDate,
            "PositionTitle": PositionTitle,
            "JobSummary": JobSummary
    })

In [1061]:
def convertToDataframe(res):
    
    response = res.json()
    print("==============================================================")
    if ('SearchResult' in response):
        
        if (archive != "Y"):
            searchResultNumber = response.get("SearchResult", {}).get("SearchResultCountAll", 0)
        else:
            searchResultNumber = response.get("SearchResult", {}).get("SearchResultCount", 0)
            
        print(searchResultNumber, "results found in API response...")        
        
        if (searchResultNumber > 0):
            searchResults = response.get("SearchResult", {}).get("SearchResultItems", None)
            
            if (len(searchResults) > 0):
                t0 = time.time()
                print("Parsing in progress...")
                df = parseResults(searchResults).reset_index(drop = True)
                t1 = time.time()
                print("Parse complete. \nDuration: ", round(t1-t0, 5), " seconds.")
                print("Number of records: ", len(df))
                return df
            else:
                print("No Search Results.")        
        else:
            print("Search Result Number = 0.")                
    else:
        print("No SearchResult found in json response.")   

In [1062]:
df = convertToDataframe(res)

91 results found in API response...
Parsing in progress...
Parse complete. 
Duration:  0.00863  seconds.
Number of records:  91


In [1063]:
now = datetime.datetime.now()

nowDate = str(now).split(" ")[0].replace("-", "")
nowHour = str(now).split(" ")[1].split(":")[0]
nowMin = str(now).split(" ")[1].split(":")[1]
nowSec = str(now).split(" ")[1].split(":")[2].split(".")[0]

nowString = nowDate + nowHour + nowMin + nowSec

searchDates = ""

if archive == "Y":
    searchDates = "_" + str(startDate) + "_" + str(endDate)
    searchDates = searchDates.replace("/", "")

fileName = title.replace(";", "") + "_" + archive + "_" + str(len(df)) + "_" + nowString + searchDates + ".csv"

print(fileName)

dataengineer_N_91_20171126211753.csv


In [1064]:
df.to_csv(fileName)

In [1065]:
df.head().transpose()

Unnamed: 0,0,1,2,3,4
ApplicationCloseDate,2017-12-30,2017-11-27,2017-12-11,2017-12-05,2017-12-05
DepartmentName,Department of Defense,National Aeronautics and Space Administration,National Aeronautics and Space Administration,Department of the Air Force,Department of Commerce
JobGrade,IA,GS,GS,GG,ZP
JobSummary,"JOB DESCRIPTION: Data Engineers develop, const...",The Applied Engineering and Technology Directo...,The Applied Engineering and Technology Directo...,"To achieve that mission, the Air Force has a v...",This position is located in the National Envir...
MaxPay,123234.0,145629.0,145629.0,95884.0000,161900.0
MinPay,60613.0,112021.0,94796.0,73755.0000,131767.0
OfferingType,This is a permanent position.,Permanent,Permanent,Permanent,Permanent
OrganizationName,National Geospatial-Intelligence Agency,Goddard Space Flight Center,Goddard Space Flight Center,Air Force Intelligence Analysis Agency,National Oceanic and Atmospheric Administration
PayType,Per Year,Per Year,Per Year,Per Year,Per Year
PositionEndDate,2017-12-30,2017-11-27,2017-12-11,2017-12-05,2017-12-05
