In [None]:
#imports 
import json
import requests
import pandas as pd
import matplotlib as plt
import pprint as pprint
from pandas.io.json import json_normalize
import gmaps
import gmaps.datasets

In [None]:
# data file to be saved and pulled from
rawData = "../data/rawData/jp_normalized_raw_dataframe.csv"
# renamed data file to be saved and pulled from
orgRawData = "../data/rawData/jp_organized_raw_dataframe.csv"
# No missing data file to be saved and pulled from
noMisingData = "../data/rawData/jp_no_missing_raw_data.csv"
# Raw geo data file to be saved and pulled from
rawGeoData = "../data/rawData/jp_geo_raw_dataframe.csv"

# path csv file location
pathRawData = "../data/rawData/"

In [None]:
# Remember to update the config file with your API key
from config import api_key
from config import api_id
from config import gkey

In [None]:
url = "https://api.adzuna.com/v1/api/jobs/us/search/"
#have to create iteration to get all pages of data
page = "1"
api_details ="?" + "app_id=" + api_id + "&app_key=" + api_key
# Build query URL
query_url = url + page + api_details
query_url

In [None]:
data_response = requests.get(query_url)
data_json = data_response.json()
type(data_json)


In [None]:
#Getting data using json_normalize
#LINK: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.io.json.json_normalize.html

pages_test = [str(x) for x in range(1,101)]

counter = 0
# Build query URL
data_pages_df = pd.DataFrame()

for page_num in pages_test:
    response = requests.get(url + page_num + api_details).json()
    
    #create a df to store the normalized page that has the job postings
    normalized_page = json_normalize(response['results'])
    
    #add normalized data into a df
    data_page_df = pd.DataFrame(normalized_page)
    
    #add the url column to df incase an error occurs you know what page you were on
    data_page_df['query_url'] = 'page number ' + page_num
    
    #append to a new df so each page can be saved
    #Sorting because non-concatenation axis is not aligned
    data_pages_df = data_pages_df.append(data_page_df, sort=True)
    
    counter += 1
    print(f"Page {page_num} iteration complete")
    
print(f'total rows iterated | {counter * 10}')   
      

In [None]:
#save raw data to dataRaw folder
data_pages_df.to_csv(rawData, index=False)

In [None]:
#calling the csv raw data
csv_file = pd.read_csv(rawData)
csv_file.head(3)


In [None]:
#making a copy
csv_file_copy = csv_file.copy()

#getting only the rows that we are using
csv_file_copy = csv_file_copy[["id","title","category.label","company.display_name", "location.display_name", "latitude","longitude" ]]
csv_file_copy.head()

In [None]:
#Renaming the column names

renamed_csv_file = csv_file_copy.rename(columns = {
    "id": "Job Posting ID",
    "title": "Job Title",
    "category.label":"Category",
    "company.display_name":"Company Name",
    "location.display_name":"Location",
    "latitude":"Lat",
    "longitude":"Lng"
})
renamed_csv_file.head()

In [None]:
#save raw data to dataRaw folder
renamed_csv_file.to_csv(orgRawData, index=False)

In [None]:
#calling the csv org data
org_raw_csv_file = pd.read_csv(orgRawData)
org_raw_csv_file_copy = org_raw_csv_file.copy()
org_raw_csv_file_copy.head()

In [None]:
#Count Rows of all columns
org_raw_csv_file_copy.count()

In [None]:
#Delete Rows with missing data from lat and lng
org_data = org_raw_csv_file_copy.dropna()
org_data.count()

In [None]:
#Save organized raw data into rawData folder
org_data.to_csv(noMisingData, index=False)

In [None]:
#calling the csv org data
no_missing_data_csv_file = pd.read_csv(noMisingData)
no_missing_data = no_missing_data_csv_file.copy()
no_missing_data.head()

In [None]:
#Creating new column for combined LatLng
no_missing_data['LatLng'] = no_missing_data['Lat'].map(str) + "," + no_missing_data['Lng'].map(str)

LatLng = list(no_missing_data['LatLng'])
LatLng

In [None]:
#Create new columns for the new data pulling from google api
no_missing_data['geocode_data'] = ''
no_missing_data['city'] = ''
no_missing_data['state'] = ''
no_missing_data['country'] = ''

#Create a function to call each url with the datas Lat Long
def reverse_geocode(latlng):
    #this going to store the url for each LatLng
    result = {}
    geocode_url = "https://maps.googleapis.com/maps/api/geocode/json?latlng="
    api_geo_details ="&key=" + gkey
    query_geo_url = geocode_url + latlng + api_geo_details
    geo_data_response = requests.get(query_geo_url)
    geo_data_json = geo_data_response.json()
    
    if len(geo_data_json['results']) > 0:
        result = geo_data_json['results'][0]
    return result

#use map to call the function for all column LatLng
no_missing_data['geocode_data'] = no_missing_data['LatLng'].map(reverse_geocode)
no_missing_data.head()

In [None]:
#Created a function that searches for Country
def parse_country(geocode_data):
    if (not geocode_data is None) and ('address_components' in geocode_data):
        for component in geocode_data['address_components']:
            if 'country' in component['types']:
                return component['long_name']
    return None

#Created a function that searches for state
def parse_state(geocode_data):
    if (not geocode_data is None) and ('address_components' in geocode_data):
        for component in geocode_data['address_components']:
            if 'administrative_area_level_1' in component['types']:
                return component['long_name']
    return None

#Created a function that searches for city/town 
def parse_city(geocode_data):
    if (not geocode_data is None) and ('address_components' in geocode_data):
        for component in geocode_data['address_components']:
            if 'locality' in component['types']:
                return component['long_name']
            elif 'postal_town' in component['types']:
                return component['long_name']
            elif 'administrative_area_level_2' in component['types']:
                return component['long_name']
            elif 'administrative_area_level_1' in component['types']:
                return component['long_name']
    return None

#append these functions for city state and country using the maps function
no_missing_data['city'] = no_missing_data['geocode_data'].map(parse_city)
no_missing_data['state'] = no_missing_data['geocode_data'].map(parse_state)
no_missing_data['country'] = no_missing_data['geocode_data'].map(parse_country)
no_missing_data.head()

In [None]:
#save raw data to dataRaw folder
geo_raw_df = no_missing_data
geo_raw_df.to_csv(rawGeoData, index=False)
geo_raw_df.head()