# API Call
- Input file ('Resources/use_this_fortune_500.csv')
- Output file ('Resources/fortune_coordinates_new.csv')

In [56]:
import time
from geopy.geocoders import Nominatim
import pandas as pd
import numpy as np
import re

In [57]:
# Initialize Nominatim API
geolocator = Nominatim(user_agent="MyApp")


In [58]:
# Read in data
df = pd.read_csv('Resources/use_this_fortune_500.csv')
final_df = df.loc[df["year"]>= 2015]
final_df.reset_index(drop=True,inplace=True)
final_df.head(5)

Unnamed: 0,name,rank,year,industry,sector,headquarters_state,headquarters_city,market_value_mil,revenue_mil,profit_mil,asset_mil,employees,founder_is_ceo,female_ceo,newcomer_to_fortune_500,global_500
0,Walmart,1,2015,General Merchandisers,Retailing,AR,Bentonville,265344.0,485651.0,16363.0,203706.0,2200000.0,no,no,no,
1,Exxon Mobil,2,2015,Petroleum Refining,Energy,TX,Irving,356549.0,382597.0,32520.0,349493.0,83700.0,no,no,no,
2,Chevron,3,2015,Petroleum Refining,Energy,CA,San Ramon,197381.0,203784.0,19241.0,266026.0,64700.0,no,no,no,
3,Berkshire Hathaway,4,2015,Insurance: Property and Casualty (Stock),Financials,NE,Omaha,357344.0,194673.0,19872.0,526186.0,316000.0,no,no,no,
4,Apple,5,2015,"Computers, Office Equipment",Technology,CA,Cupertino,724773.0,182795.0,39510.0,231839.0,97200.0,no,no,no,


In [59]:
# Generate list of cities that need coordinates
city_list = final_df['headquarters_city']

# Remove empty values
city_list = city_list.dropna()
city_list

0        Bentonville
1             Irving
2          San Ramon
3              Omaha
4          Cupertino
            ...     
4495        New York
4496    Redwood City
4497      Hartsville
4498     Santa Clara
4499      Menlo Park
Name: headquarters_city, Length: 4500, dtype: object

In [60]:
# Generate list of cities that need coordinates
state_list = final_df['headquarters_state']

# Remove empty values
state_list = state_list.dropna()
state_list

0       AR
1       TX
2       CA
3       NE
4       CA
        ..
4495    NY
4496    CA
4497    SC
4498    CA
4499    CA
Name: headquarters_state, Length: 4500, dtype: object

In [61]:
# Zip city and state together for API request
city_state_zip = zip(city_list, state_list)
city_state = list(city_state_zip)
print(city_state)

[('Bentonville', 'AR'), ('Irving', 'TX'), ('San Ramon', 'CA'), ('Omaha', 'NE'), ('Cupertino', 'CA'), ('Detroit', 'MI'), ('Houston', 'TX'), ('Fairfield', 'CT'), ('Dearborn', 'MI'), ('Woonsocket', 'RI'), ('San Francisco', 'CA'), ('Dallas', 'TX'), ('San Antonio', 'TX'), ('Minnetonka', 'MN'), ('New York', 'NY'), ('Chesterbrook', 'PA'), ('Washington', 'DC'), ('Issaquah', 'WA'), ('Palo Alto', 'CA'), ('Cincinnati', 'OH'), ('New York', 'NY'), ('St. Louis', 'MO'), ('Charlotte', 'NC'), ('Armonk', 'NY'), ('Findlay', 'OH'), ('Dublin', 'OH'), ('Chicago', 'IL'), ('New York', 'NY'), ('Seattle', 'WA'), ('San Francisco', 'CA'), ('Redmond', 'WA'), ('Cincinnati', 'OH'), ('Atlanta', 'GA'), ('Chicago', 'IL'), ('Deerfield', 'IL'), ('Minneapolis', 'MN'), ('New Brunswick', 'NJ'), ('Indianapolis', 'IN'), ('New York', 'NY'), ('Mountain View', 'CA'), ('Bloomington', 'IL'), ('McLean', 'VA'), ('Philadelphia', 'PA'), ('Purchase', 'NY'), ('Hartford', 'CT'), ('New York', 'NY'), ('Atlanta', 'GA'), ('Midland', 'MI'), (

In [62]:
# Empty lists for holding data
lat = []
lng = []
city = []
skip_list = []

In [63]:
location = geolocator.geocode(city_state[0])
location.latitude

36.3728538

In [64]:
# Record coordinates for each city
# This cell takes a LONG TIME to run

for i in city_state:
    if i in city or i == None or i in skip_list:
        pass
    else:
        try:
            location = geolocator.geocode(i)
            lat.append(location.latitude)
            lng.append(location.longitude)
            city.append(i)
            print(f'Adding coordinates for {i} to list')
            time.sleep(1)
        except:
            print(f'Skpping {i}')
            skip_list.append(i)
            time.sleep(1)

Adding coordinates for ('Bentonville', 'AR') to list
Adding coordinates for ('Irving', 'TX') to list
Adding coordinates for ('San Ramon', 'CA') to list
Adding coordinates for ('Omaha', 'NE') to list
Adding coordinates for ('Cupertino', 'CA') to list
Adding coordinates for ('Detroit', 'MI') to list
Adding coordinates for ('Houston', 'TX') to list
Adding coordinates for ('Fairfield', 'CT') to list
Adding coordinates for ('Dearborn', 'MI') to list
Adding coordinates for ('Woonsocket', 'RI') to list
Adding coordinates for ('San Francisco', 'CA') to list
Adding coordinates for ('Dallas', 'TX') to list
Adding coordinates for ('San Antonio', 'TX') to list
Adding coordinates for ('Minnetonka', 'MN') to list
Adding coordinates for ('New York', 'NY') to list
Adding coordinates for ('Chesterbrook', 'PA') to list
Adding coordinates for ('Washington', 'DC') to list
Adding coordinates for ('Issaquah', 'WA') to list
Adding coordinates for ('Palo Alto', 'CA') to list
Adding coordinates for ('Cincinnat

In [65]:
# Create dataframe for merging
city_df = pd.DataFrame({
    'city_state': city,
    'latitude': lat,
    'longitude': lng
})

In [66]:
# Split city and state to separate columns
city_merge = []
state_merge = []

for i in range(len(city_df)):
    city_merge.append(city_df['city_state'][i][0])
    state_merge.append(city_df['city_state'][i][1])

In [67]:
# Create dataframe for merging
merge_city_df = pd.DataFrame({
    'headquarters_city': city_merge,
    'headquarters_state': state_merge,
    'latitude': lat,
    'longitude': lng
})

In [70]:
# Merge coordinates to the col_rename_df_updated
merged_col_rename_df_updated = final_df.merge(merge_city_df,on=['headquarters_city','headquarters_state'], how='left')
merged_col_rename_df_updated

Unnamed: 0,name,rank,year,industry,sector,headquarters_state,headquarters_city,market_value_mil,revenue_mil,profit_mil,asset_mil,employees,founder_is_ceo,female_ceo,newcomer_to_fortune_500,global_500,latitude,longitude
0,Walmart,1,2015,General Merchandisers,Retailing,AR,Bentonville,265344.0,485651.0,16363.0,203706.0,2200000.0,no,no,no,,36.372854,-94.208817
1,Exxon Mobil,2,2015,Petroleum Refining,Energy,TX,Irving,356549.0,382597.0,32520.0,349493.0,83700.0,no,no,no,,32.829518,-96.944218
2,Chevron,3,2015,Petroleum Refining,Energy,CA,San Ramon,197381.0,203784.0,19241.0,266026.0,64700.0,no,no,no,,37.764802,-121.954439
3,Berkshire Hathaway,4,2015,Insurance: Property and Casualty (Stock),Financials,NE,Omaha,357344.0,194673.0,19872.0,526186.0,316000.0,no,no,no,,41.258746,-95.938376
4,Apple,5,2015,"Computers, Office Equipment",Technology,CA,Cupertino,724773.0,182795.0,39510.0,231839.0,97200.0,no,no,no,,37.322893,-122.032290
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4495,KKR,496,2023,Securities,Financials,NY,New York,45225.0,7273.0,841.0,277077.0,4150.0,no,no,no,no,40.712728,-74.006015
4496,Equinix,497,2023,Real Estate,Financials,CA,Redwood City,66873.0,7263.0,704.0,30311.0,12097.0,no,no,no,no,37.486324,-122.232523
4497,Sonoco Products,498,2023,"Packaging, Containers",Materials,SC,Hartsville,5975.0,7251.0,466.0,7053.0,22000.0,no,no,yes,no,34.374043,-80.073401
4498,ServiceNow,499,2023,Computer Software,Technology,CA,Santa Clara,94338.0,7245.0,325.0,13299.0,20433.0,no,no,yes,no,37.233325,-121.684635


In [76]:
# Generate list of states that need coordinates
state_list = []


for i in range(len(merged_col_rename_df_updated)):
    if merged_col_rename_df_updated['latitude'][i] > 0 or merged_col_rename_df_updated['latitude'][i] < 0:
        pass
    else:
        item = merged_col_rename_df_updated['headquarters_state'][i]
        state_list.append(item)


['CO',
 'CO',
 'MA',
 'CO',
 'CO',
 'MA',
 'CO',
 'CO',
 'MA',
 'CO',
 'CO',
 'CO',
 'MA',
 'CO',
 'CO',
 'CO',
 'CO',
 'MA',
 'CO',
 'MA',
 'MA',
 'CO',
 'CO',
 'CO',
 'MA',
 'MA',
 'CO',
 'CO',
 'CO',
 'MA',
 'CO',
 'CO',
 'CO',
 'MA',
 'CO',
 'CO',
 'MA',
 'CO',
 'CO',
 'MA',
 'CO',
 'CO',
 'MA',
 'CO',
 'MA',
 'CO',
 'CO',
 'CO',
 'CO',
 'MA',
 'CO',
 'MA',
 'CO',
 'CO',
 'MA',
 'MA',
 'CO',
 'MA',
 'CO',
 'CO',
 'CO']

In [77]:
# Remove duplicates from the list
state_list = list(set(state_list))
state_list

['MA', 'CO']

In [78]:
# Specify the state name for errors so API isn't confused
for i in range(len(state_list)):
    if state_list[i] == 'MA':
        state_list[i] = 'Massachusetts'
    elif state_list[i] == 'CO':
        state_list[i] = 'Colorado'

state_list

['Massachusetts', 'Colorado']

In [79]:
# Generate empty lists to hold data
lat = []
lng = []
state = []

In [80]:
# Record coordinates for each state, when city is unavailable
for i in state_list:
    if i in state or i == None:
        pass
    else:
        try:
            location = geolocator.geocode(i)
            lat.append(location.latitude)
            lng.append(location.longitude)
            state.append(i)
            print(f'Adding coordinates for {i} to list')
            time.sleep(1)
        except:
            print(f'Skpping {i}')

Adding coordinates for Massachusetts to list
Adding coordinates for Colorado to list


In [81]:
# Generate dataframe of state coordinates
state_df = pd.DataFrame({
    'headquarters_state': state,
    'latitude': lat,
    'longitude': lng
})
state_df

Unnamed: 0,headquarters_state,latitude,longitude
0,Massachusetts,42.378877,-72.032366
1,Colorado,38.725178,-105.607716


In [82]:
# Revert state names back to original
for i in range(len(state_df)):
    if state_df['headquarters_state'][i] == 'Massachusetts':
        state_df['headquarters_state'][i] = 'MA'
    elif state_df['headquarters_state'][i] == 'Colorado':
        state_df['headquarters_state'][i] = 'CO'

state_df

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  state_df['headquarters_state'][i] = 'MA'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  state_df['headquarters_state'][i] = 'CO'


Unnamed: 0,headquarters_state,latitude,longitude
0,MA,42.378877,-72.032366
1,CO,38.725178,-105.607716


In [83]:
# Add coordinates to the merged dataframe
try:
    for i in range(len(merged_col_rename_df_updated)):
        if merged_col_rename_df_updated['latitude'][i] > 0 or merged_col_rename_df_updated['latitude'][i] < 0:
            pass
        else:
            merged_col_rename_df_updated['latitude'][i] = state_df['latitude'].loc[state_df['headquarters_state'] == merged_col_rename_df_updated['headquarters_state'][i]]
            merged_col_rename_df_updated['longitude'][i] = state_df['longitude'].loc[state_df['headquarters_state'] == merged_col_rename_df_updated['headquarters_state'][i]]
except:
    pass

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_col_rename_df_updated['latitude'][i] = state_df['latitude'].loc[state_df['headquarters_state'] == merged_col_rename_df_updated['headquarters_state'][i]]
  merged_col_rename_df_updated['latitude'][i] = state_df['latitude'].loc[state_df['headquarters_state'] == merged_col_rename_df_updated['headquarters_state'][i]]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_col_rename_df_updated['longitude'][i] = state_df['longitude'].loc[state_df['headquarters_state'] == merged_col_rename_df_updated['headquarters_state'][i]]
  merged_col_rename_df_updated['longitude'][i] = state_df['longitude'].loc[state_df['headquarters_st

In [84]:
# Export
export = merged_col_rename_df_updated[['name','headquarters_city','headquarters_state','latitude','longitude']]

In [85]:
# Export data
export.to_csv('Resources/fortune_coordinates_new.csv', index=False)