In [1]:
import requests
import urllib.parse
import json
import pandas as pd
from pandas import json_normalize 
import csv
from datetime import datetime
import zipcodes

In [2]:
#Vars and Stuff--------------------------------------------------------------------------------------------------------------------------------------
AUTH_URL = "https://auth.bullhornstaffing.com/oauth"
REST_URL = "https://rest.bullhornstaffing.com/rest-services"

auth = dict(
    client = 'NA',
    secret = 'NA',
    username = "NA",
    password = "NA")

#BHAuth Stuff----------------------------------------------------------------------------------------------------------------------------------------

def get_auth_code():
    params = dict(
            client_id= auth['client'],
            response_type= "code",
            username= auth['username'],
            password= auth['password'],
            action= "Login")

    url = AUTH_URL + "/authorize"
    req = requests.get(url, params=params)
    response_url = req.url
    response_query = requests.utils.urlparse(response_url).query
    #Parses return URL to get us just the code.
    query_dict = urllib.parse.parse_qs(response_query)
    if query_dict.get("code"):
        auth_code = query_dict["code"][0]
    else:
        auth_code = ""
    return auth_code


def get_access_token(auth_code):
    params = dict(
            client_id=auth['client'],
            client_secret=auth['secret'],
            code = auth_code,
            grant_type="authorization_code")
    
    url = AUTH_URL + "/token"

    req = requests.post(url, params=params)
    access_token_url = req.url
    try:
        response_dict = req.json()
    except Exception as err:
        print(err)
        response_dict = {}
    return(response_dict)
#BHRest Stuff----------------------------------------------------------------------------------------------------------------------------------------

version = "*"

import pprint
def rest_login(access_token):
    url = REST_URL +"/login"
    login_params = dict(
            version = version,
            access_token = access_token['access_token'])
    req = requests.get(url, params=login_params)
    try:
        response_dict = req.json()
    except Exception as err:
        print(err)
        rest_login = {}
    rest_login = response_dict
    return rest_login

#Main------------------------------------------------------------------------------------------------------------------------------------------------
def main():
    auth_code = get_auth_code()
    access_tokenDict = get_access_token(auth_code)
    BhRestTokenDict = rest_login(access_tokenDict)
    return BhRestTokenDict

In [5]:
df = pd.read_csv('C:/Users/other/Documents/Capstone/Data/all_cns_8_1_23.csv')
 

In [6]:
# Data Cleaning : change epoch times to dates, drop columns and change empty strings to NaN
df['dateAdded'] = pd.to_datetime(df['dateAdded'], unit='ms').dt.date
df = df.drop(columns=['_score', 'address.address1', 'address.address2', 'address.timezone'], axis=1)
df=df.mask(df == '')

In [7]:
# State abbreviation to name dictionary (found online - JeffPaine/us_state_abbreviations.py)
statesAbbrev = {
    'AK': 'Alaska',
    'AL': 'Alabama',
    'AR': 'Arkansas',
    'AZ': 'Arizona',
    'CA': 'California',
    'CO': 'Colorado',
    'CT': 'Connecticut',
    'DC': 'District of Columbia',
    'DE': 'Delaware',
    'FL': 'Florida',
    'GA': 'Georgia',
    'HI': 'Hawaii',
    'IA': 'Iowa',
    'ID': 'Idaho',
    'IL': 'Illinois',
    'IN': 'Indiana',
    'KS': 'Kansas',
    'KY': 'Kentucky',
    'LA': 'Louisiana',
    'MA': 'Massachusetts',
    'MD': 'Maryland',
    'ME': 'Maine',
    'MI': 'Michigan',
    'MN': 'Minnesota',
    'MO': 'Missouri',
    'MS': 'Mississippi',
    'MT': 'Montana',
    'NC': 'North Carolina',
    'ND': 'North Dakota',
    'NE': 'Nebraska',
    'NH': 'New Hampshire',
    'NJ': 'New Jersey',
    'NM': 'New Mexico',
    'NV': 'Nevada',
    'NY': 'New York',
    'OH': 'Ohio',
    'OK': 'Oklahoma',
    'OR': 'Oregon',
    'PA': 'Pennsylvania',
    'RI': 'Rhode Island',
    'SC': 'South Carolina',
    'SD': 'South Dakota',
    'TN': 'Tennessee',
    'TX': 'Texas',
    'UT': 'Utah',
    'VA': 'Virginia',
    'VT': 'Vermont',
    'WA': 'Washington',
    'WI': 'Wisconsin',
    'WV': 'West Virginia',
    'WY': 'Wyoming'
}

In [8]:
# Change to state name to abbreviation dictionary
statesName = {v: k for k, v in statesAbbrev.items()}

In [None]:
def getZip(x):
    cityZip = noZip.at[noZip.index[x],'address.city']
    stateZip = noZip.at[noZip.index[x],'address.state']

    if stateZip in statesName:
        lst = zipcodes.filter_by(city = cityZip, state = statesName[stateZip])
        if len(lst) > 0:
            return lst[0]['zip_code']
        else:
            return None
    else:
        return None

noZip = df[pd.isnull(df['address.zip']) & df[['address.state']].notnull().all(1) & df[['address.city']].notnull().all(1)]
noZip = noZip.reset_index()
noZip['zipNew'] = noZip.index.map(lambda x: getZip(x))
noZip = noZip[noZip[['zipNew']].notnull().all(1)]
noZip = noZip.reset_index()
noZip

In [9]:
def getStateName(x):
    if x in statesAbbrev:
        return statesAbbrev[x]
    else:
        return None

In [None]:
def getZipOverlap(x):
    cityZip = overlap.at[overlap.index[x],'address.city']
    stateZip = overlap.at[overlap.index[x],'stateName']

    if stateZip in statesName:
        lst = zipcodes.filter_by(city = cityZip, state = statesName[stateZip])
        if len(lst) > 0:
            return lst[0]['zip_code']
        else:
            return None
    else:
        return None
    

overlap = df.loc[df['address.state'].str.len() == 2]
overlap['stateName'] = overlap['address.state'].map(lambda x: getStateName(x))
overlap = overlap[overlap[['stateName']].notnull().all(1)]
overlap = overlap[pd.isnull(overlap['address.zip'])]
overlap = overlap.reset_index()
overlap['zipNew'] = overlap.index.map(lambda x: getZipOverlap(x))
overlap = overlap[overlap[['zipNew']].notnull().all(1)]
overlap = overlap.reset_index()
overlap

In [None]:
overlapIds = overlap.id.values.tolist()

stateChange = df.loc[df['address.state'].str.len() == 2]
stateChange = stateChange[~stateChange['id'].isin(overlapIds)]
stateChange['stateName'] = stateChange['address.state'].map(lambda x: getStateName(x))
stateChange = stateChange[stateChange[['stateName']].notnull().all(1)]
stateChange = stateChange.fillna('')
stateChange = stateChange.reset_index()
stateChange

In [20]:
auth_get = main()
auth_url = auth_get['restUrl']
auth_token = auth_get['BhRestToken']

In [None]:
# Update Zip Codes in noZip - records with only zip code missing

for index, row in noZip.iterrows():
    
    url = auth_url+"entity/Candidate/"+str(row['id'])
    myobj = {"address": {
        "city":row['address.city'],
        "state":row['address.state'],
		"zip":row['zipNew'],
        "countryID":row['address.countryID']
        }
    }
    
    Headers = { "BhRestToken" : auth_token }

    x = requests.post(url, json = myobj, headers = Headers)

    if 'errorMessage' in x.text:

        auth_get = main()
        auth_url = auth_get['restUrl']
        auth_token = auth_get['BhRestToken']

        Headers = { "BhRestToken" : auth_token }

        x = requests.post(url, json = myobj, headers = Headers)

In [None]:
# Update Zip Codes and State Names in overlap data

for index, row in overlap.iterrows():
    
    url = auth_url+"entity/Candidate/"+str(row['id'])
    myobj = {"address": {
        "city":row['address.city'],
        "state":row['stateName'],
		"zip":row['zipNew'],
        "countryID":row['address.countryID']
        }
    }
    
    Headers = { "BhRestToken" : auth_token }

    x = requests.post(url, json = myobj, headers = Headers)

    if 'errorMessage' in x.text:

        auth_get = main()
        auth_url = auth_get['restUrl']
        auth_token = auth_get['BhRestToken']

        Headers = { "BhRestToken" : auth_token }

        x = requests.post(url, json = myobj, headers = Headers)

In [None]:
# Update just State Abbreviations for records with just incorrect state names

for index, row in stateChange.iterrows():
    
    url = auth_url+"entity/Candidate/"+str(row['id'])
    myobj = {"address": {
        "city":row['address.city'],
        "state":row['stateName'],
		"zip":row['address.zip'],
        "countryID":row['address.countryID']
        }
    }
    
    Headers = { "BhRestToken" : auth_token }

    x = requests.post(url, json = myobj, headers = Headers)

    if 'errorMessage' in x.text:

        auth_get = main()
        auth_url = auth_get['restUrl']
        auth_token = auth_get['BhRestToken']

        Headers = { "BhRestToken" : auth_token }

        x = requests.post(url, json = myobj, headers = Headers)