In [2]:
# Dependencies
import requests
import json
import pandas as pd

In [25]:
import sys
path_to_config = "../../locksmith"
sys.path.insert(0, path_to_config)
import config

In [26]:
# Google developer API key
from config import gkey

**Now using systematic random grid of points arrayed across the Georgia at intervals of 25000 m**

* To prevent spillover into other states I eroded the state boundary by 8000 m
* Points were spaced at an interval of 25000 m

* My goal was to maximize coverage, but reduce spillover into other states

In [27]:
# Read in the grid:
ga_grid = pd.read_csv("../ga_shapefile/ga_grid_df.csv", index_col = 0)

In [28]:
ga_grid.reset_index(drop = True, inplace = True)
ga_grid.head()

Unnamed: 0,latitude,longitude
0,30.592827,-82.111129
1,30.776857,-84.463661
2,30.78357,-84.202829
3,30.789758,-83.941913
4,30.795423,-83.680919


In [11]:
ga_grid_count = len(ga_grid)
len(ga_grid)

227

In [14]:
# If you want a test grid:

ga_grid_test = ga_grid[0:10]

In [12]:
# Make the DataFrame

agencies_df3 = pd.DataFrame({"agency_name":[],
                            "agency_status":[],
                            "agency_address":[],
                            "agency_latitude":[],
                            "agency_longitude":[]})

display(agencies_df3)

Unnamed: 0,agency_name,agency_status,agency_address,agency_latitude,agency_longitude


In [16]:
# This code will pull insurance data at each coordinate, including all next page tokens (recursively).


base_url = "https://maps.googleapis.com/maps/api/place/nearbysearch/json"


def nextpage(your_results, your_response, your_df, updated_local_count):
        
    for j in range(len(your_response["results"]) - 1):
        
        try:
            your_df.loc[local_count, "agency_name"] = your_results[j]["name"]
            your_df.loc[local_count, "agency_status"] = your_results[j]["business_status"]
            your_df.loc[local_count, "agency_address"] = your_results[j]["vicinity"]
            your_df.loc[local_count, "agency_latitude"] = your_results[j]["geometry"]["location"]["lat"]
            your_df.loc[local_count, "agency_longitude"] = your_results[j]["geometry"]["location"]["lng"]
        
        except (KeyError, IndexError):
            print("Missing field/result... skipping.")
        
        updated_local_count += 1
        print(updated_local_count)
        print("------------")
 
    return {"df":your_df, "lcu":updated_local_count}


local_count = 0

# use iterrows to iterate through pandas dataframe
for index, row in ga_grid_test.iterrows():

    params = {
    "radius": 50000,
    "type": "insurance_agency",
    "key": gkey,
    }
    
    lat = row['latitude']
    lon = row['longitude']

    # add location to params dict
    params['location'] = f"{lat}, {lon}"

    # assemble url and make API request
    print(f"Retrieving Results for Index {index}.")
    response = requests.get(base_url, params = params).json()
    
    # extract results
    results = response['results']
    
    for i in range(len(response["results"]) - 1):
        
        try:
            agencies_df3.loc[local_count, "agency_name"] = results[i]["name"]
            agencies_df3.loc[local_count, "agency_status"] = results[i]["business_status"]
            agencies_df3.loc[local_count, "agency_address"] = results[i]["vicinity"]
            agencies_df3.loc[local_count, "agency_latitude"] = results[i]["geometry"]["location"]["lat"]
            agencies_df3.loc[local_count, "agency_longitude"] = results[i]["geometry"]["location"]["lng"]
        
        except (KeyError, IndexError):
            print("Missing field/result... skipping.")
        
        local_count += 1
        print(local_count)
        print("------------")
    
    
    while 'next_page_token' in response.keys():
        time.sleep(5)
        print(response.keys())
        npt = response["next_page_token"]
        params = {
               "pagetoken": npt,
               "key": gkey
                }
        response = requests.get(base_url, params = params).json()
        print(response)
        results = response["results"]
        output_dict = nextpage(results, response, agencies_df3, local_count)
        agencies_df3 = output_dict["df"]
        local_count = output_dict['lcu']
        print(local_count)

Retrieving Results for Index 0.
Retrieving Results for Index 1.
Retrieving Results for Index 2.
Retrieving Results for Index 3.
Retrieving Results for Index 4.
Retrieving Results for Index 5.
Retrieving Results for Index 6.
Retrieving Results for Index 7.
Retrieving Results for Index 8.
Retrieving Results for Index 9.


In [17]:
agencies_df3.head()
display(agencies_df3)

Unnamed: 0,agency_name,agency_status,agency_address,agency_latitude,agency_longitude


In [18]:
agencies_df3.to_csv("agencies_df_grid.csv")

NameError: name 'agencies_df3' is not defined

In [20]:
# If you want to read the data back in before additional processing.

agencies_grid = pd.read_csv("agencies_df_grid.csv", index_col = 0)
len(agencies_grid)

4764

In [21]:
# Need to drop the target coordinates before dropping duplicates (otherwise they will not drop)

agencies_grid.drop(["agency_latitude", "agency_longitude"], axis = 1)

Unnamed: 0,agency_name,agency_status,agency_address
0,Acceptance Insurance,OPERATIONAL,"3000 Dunn Avenue Suite 68A, Jacksonville"
1,Phenix Supply Co,OPERATIONAL,"11705 Industry Drive, Jacksonville"
2,Insurance Auto Auctions,OPERATIONAL,"14492 New Kings Road, Jacksonville"
3,Florida State College of Jacksonville: Insuran...,OPERATIONAL,"3939 Roosevelt Boulevard, Jacksonville"
4,Gary Anderson - State Farm Insurance Agent,OPERATIONAL,"450111 State Road 200, Callahan"
...,...,...,...
12849,Kim Farner Insurance Agency,OPERATIONAL,"375 North Main Street, Hiawassee"
12850,Norton Mountain Insurance,OPERATIONAL,"855 Washington Street, Clarkesville"
12851,Health Life Retirement Plans,OPERATIONAL,"47 Burch Cove Road, Hayesville"
12852,John Essigman Wealth Advisors LLC,OPERATIONAL,"141 North Main Street, Cleveland"


In [22]:
agencies_grid.drop_duplicates(inplace = True)

In [23]:
len(agencies_grid)  # Check how many left after dropping

1402

In [24]:
agencies_grid.reset_index(drop = True, inplace = True)  # Reset the index

In [25]:
agencies_grid.to_csv("agencies_df_grid_drop.csv") # Save a copy of the DataFrame to .csv, after dropping.