# Connecting to Opencage API
* creating a CSV to get the cities Lat and Long

In [1]:
#imports 
import json
import requests
import pandas as pd
from opencage.geocoder import OpenCageGeocode

### Key stored in config file to reference

In [2]:
from config import geokey

### CSV file path variables

In [3]:
# cleaned data file to be saved
cleanData = "../data/cleanData/master_clean_data.csv"
cityData = "../data/cleanData/master_city_data.csv"

# path csv file location
pathCleanData ="../datacleanData/"

#### Connecting to Opencage API using key

In [4]:
#Connecting to Opencage API to get the lat and lng of cities
key = geokey  # get api key from:  https://opencagedata.com

#url and key combined
geocoder = OpenCageGeocode(key)

#### Pulling clean data to use

In [5]:
#Pull clean data
#calling the csv cleanData folder
cleansed_data_df = pd.read_csv(cleanData)
cleansed_data_df.head(3)

Unnamed: 0,job posting id,job title,date created,category,company name,city,state,country,lat,lng,latlng
0,1312805843,Physician: Core Faculty Family Medicine Job in...,2019-10-25 12:38:55,Healthcare & Nursing Jobs,CompHealth,Indiana,Pennsylvania,United States,40.621455,-79.152535,"40.621455,-79.152535"
1,1312804908,Physician: Associate Regional Medical Officer ...,2019-10-25 12:38:30,Healthcare & Nursing Jobs,CompHealth,San Jose,California,United States,37.390494,-121.885434,"37.390494,-121.885434"
2,1312806070,Physician: Child and Adolescent Psychiatry Pos...,2019-10-25 12:39:00,Healthcare & Nursing Jobs,CompHealth,Norwich,New York,United States,42.522869,-75.574677,"42.522869,-75.57467700000002"


#### Lowercasing the columns names

In [6]:
#Lowercase all column names 
cleansed_data_df.columns = map(str.lower, cleansed_data_df.columns)
cleansed_data_df.head(3)

Unnamed: 0,job posting id,job title,date created,category,company name,city,state,country,lat,lng,latlng
0,1312805843,Physician: Core Faculty Family Medicine Job in...,2019-10-25 12:38:55,Healthcare & Nursing Jobs,CompHealth,Indiana,Pennsylvania,United States,40.621455,-79.152535,"40.621455,-79.152535"
1,1312804908,Physician: Associate Regional Medical Officer ...,2019-10-25 12:38:30,Healthcare & Nursing Jobs,CompHealth,San Jose,California,United States,37.390494,-121.885434,"37.390494,-121.885434"
2,1312806070,Physician: Child and Adolescent Psychiatry Pos...,2019-10-25 12:39:00,Healthcare & Nursing Jobs,CompHealth,Norwich,New York,United States,42.522869,-75.574677,"42.522869,-75.57467700000002"


#### Setting job id has index

In [7]:
#Set job posting id as index
cleansed_data = cleansed_data_df.set_index('job posting id')
cleansed_data.head(3)

Unnamed: 0_level_0,job title,date created,category,company name,city,state,country,lat,lng,latlng
job posting id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1312805843,Physician: Core Faculty Family Medicine Job in...,2019-10-25 12:38:55,Healthcare & Nursing Jobs,CompHealth,Indiana,Pennsylvania,United States,40.621455,-79.152535,"40.621455,-79.152535"
1312804908,Physician: Associate Regional Medical Officer ...,2019-10-25 12:38:30,Healthcare & Nursing Jobs,CompHealth,San Jose,California,United States,37.390494,-121.885434,"37.390494,-121.885434"
1312806070,Physician: Child and Adolescent Psychiatry Pos...,2019-10-25 12:39:00,Healthcare & Nursing Jobs,CompHealth,Norwich,New York,United States,42.522869,-75.574677,"42.522869,-75.57467700000002"


#### Grouping by city and state 

In [8]:
#Group by City and state 
grouped_city = cleansed_data.groupby(['city','state']).count()
grouped_city = grouped_city.reset_index()
grouped_city.head()

Unnamed: 0,city,state,job title,date created,category,company name,country,lat,lng,latlng
0,Abbeville,Louisiana,1,1,1,1,1,1,1,1
1,Abbeville,South Carolina,2,2,2,2,2,2,2,2
2,Abbott,Texas,1,1,1,1,1,1,1,1
3,Aberdeen,Idaho,1,1,1,1,1,1,1,1
4,Aberdeen,Maryland,36,36,36,36,36,36,36,36


#### Calling only the dity and state columns

In [9]:
#Find the cities lat and lng 
city_state = grouped_city[['city','state']].sort_values(by='city', ascending=True)
city_state

Unnamed: 0,city,state
0,Abbeville,Louisiana
1,Abbeville,South Carolina
2,Abbott,Texas
3,Aberdeen,Idaho
4,Aberdeen,Maryland
...,...,...
9514,Zion,Pennsylvania
9515,Zionsville,Indiana
9516,Zionsville,Pennsylvania
9517,Zirconia,North Carolina


#### Iterating through city and state to get the lat and long for city

In [11]:
#iterrating to get the items

list_city_lat = []   # create empty lists
list_cityt_lng = []

for index, row in city_state.iterrows(): # iterate over rows in dataframe

    City = row['city']
    State = row['state']       
    query = str(City)+','+str(State)

    results = geocoder.geocode(query)   
    city_lat = results[0]['geometry']['lat']
    cityt_lng = results[0]['geometry']['lng']

    list_city_lat.append(city_lat)
    list_cityt_lng.append(cityt_lng)

# create new columns from lists    

city_state['lat'] = list_city_lat   
city_state['lng'] = list_cityt_lng
city_state

Unnamed: 0,city,state,lat,lng
0,Abbeville,Louisiana,29.974650,-92.134292
1,Abbeville,South Carolina,34.177949,-82.379246
2,Abbott,Texas,31.889892,-97.087018
3,Aberdeen,Idaho,42.944078,-112.838326
4,Aberdeen,Maryland,39.509556,-76.164120
...,...,...,...,...
9514,Zion,Pennsylvania,40.914229,-77.684721
9515,Zionsville,Indiana,39.950724,-86.261697
9516,Zionsville,Pennsylvania,40.486487,-75.520184
9517,Zirconia,North Carolina,35.241783,-82.416230


#### Saving to a dataframe. this data is to be used for scatter plots in visualization

In [12]:
#Save DF to csv
city_state.to_csv(cityData, index = False)

#### Calling to see if it stored

In [15]:
#calling the csv raw data
city_csv_file = pd.read_csv(cityData)
city_csv_file

Unnamed: 0,city,state,lat,lng
0,Abbeville,Louisiana,29.974650,-92.134292
1,Abbeville,South Carolina,34.177949,-82.379246
2,Abbott,Texas,31.889892,-97.087018
3,Aberdeen,Idaho,42.944078,-112.838326
4,Aberdeen,Maryland,39.509556,-76.164120
...,...,...,...,...
9514,Zion,Pennsylvania,40.914229,-77.684721
9515,Zionsville,Indiana,39.950724,-86.261697
9516,Zionsville,Pennsylvania,40.486487,-75.520184
9517,Zirconia,North Carolina,35.241783,-82.416230
