In [1]:
# imports and froms
import json
import pandas as pd
import pymongo

from geopy.geocoders import Nominatim

# setup mongodb connection
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

# select database
db = client.project2

# object to retrieve latitude and longitude
geolocator = Nominatim(country_bias='US', user_agent="job_coordinates")

# test variables
# search_job = ['data scientist', 'data engineer']
# search_city = ['san diego', 'san francisco']

# production variables
search_job = ['data scientist', 'data engineer', 'data analyst', 'statistician', 'data & analytics manager']
search_city = ['ann arbor', 'atlanta', 'austin', 'boston', 'charlotte', 'chicago', 'cincinnati', 'columbia', 'dallas', 'denver', 'houston', 'jacksonville', 'los angeles', 'louisville', 'miami', 'minneapolis', 'nashville', 'new york', 'newport news', 'phoenix', 'san diego', 'san francisco', 'seattle', 'virginia beach']

In [2]:
# build geojson base
with open('job_income.json') as json_data:
    dt = json.load(json_data)

feature_list = []

for city in search_city:
    get_coords = geolocator.geocode(city)
    lat = get_coords.latitude
    lon = get_coords.longitude
    feature_list.append({'type': 'Feature',
                         'geometry': {
                             'type': 'Point',
                             'coordinates': [lon, lat]
                         },
                         'properties': {
                             'title': city,
                         }
                        })

In [3]:
# append title, job count, and salary to geojson
with open('job_counts.json') as json_data:
    data_jobs = json.load(json_data)

for f in feature_list:
    city = (f['properties']['title'])
   
    for key in data_jobs:
        value = data_jobs[key]
        f['properties'][key] = {'number_jobs': value[city],
                               'avg_salary': dt[city][key]
                               }

In [4]:
# complete geojson
geojson = {
  'type': 'FeatureCollection',
  'features': feature_list
}

In [5]:
# load geojson to mongodb
db.geojson.update_one({}, {'$set': geojson}, upsert=True)

<pymongo.results.UpdateResult at 0x297ab008bc8>

In [6]:
# save to json
with open('city_jobs.geojson', 'w') as fp:
    json.dump(geojson, fp)