In [1]:
#widen display
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:95% !important; }</style>"))

In [2]:
import pandas as pd
import numpy as np
import matplotlib as plt
import requests
import json
import gmaps
import time
from time import sleep

# API Keys
from config import api_key

# Configure gmaps
gmaps.configure(api_key=api_key)

In [3]:
#Google Places Types: https://developers.google.com/places/web-service/supported_types

#TODO: Can we incorporate this list into the loop so this list can change without changing the code
types = ['bank']#, 'library', 'park', 'liquor_store', 'hospital']

In [4]:
# Read in the Dallas County Appraisal District (DCAD) property values file

file = "Resources/dcad_combined.csv"

#create DataFrame
dcad_df = pd.read_csv(file, usecols=['PROPERTY_ZIPCODE', 'TOT_VAL'])
dcad_df.head(5)

Unnamed: 0,PROPERTY_ZIPCODE,TOT_VAL
0,750513060,103500
1,750513040,145500
2,750502208,168040
3,750502277,200040
4,750617840,151880


In [5]:
#add column for 5 digit zipcode
dcad_df['ZIPCODE'] = dcad_df['PROPERTY_ZIPCODE'].astype(str).str[:5]

In [6]:
#Group Property Values by Zipcode
zip_group = dcad_df.groupby('ZIPCODE')['TOT_VAL']

In [7]:
zip_group_df = dcad_df.groupby('ZIPCODE')['TOT_VAL'].agg(('count', 'mean'))\
                    .rename(columns={'count':'PropertyValueCount', 'mean':'MeanPropertyValue'})\
                    .sort_values(by='MeanPropertyValue', ascending=False)\
                    .reset_index()

In [8]:
#Add columns to DataFrame to store business data
zip_group_df["MeanPropertyDollarValue"] = zip_group_df["MeanPropertyValue"].map("${:,.0f}".format)
zip_group_df['Lat'] = "" 
zip_group_df["Lng"] = ""
zip_group_df["City"] = ""
zip_group_df["State"] = ""
zip_group_df = zip_group_df.rename(columns={"ZIPCODE": "Zipcode"})

In [9]:
# create a params dict that will be updated with new zipcode each iteration
params = {"key": api_key}

# Loop through the zipcode pd's and run a lat/long search for each
for index, row in zip_group_df.iterrows():
    base_geocode_url = "https://maps.googleapis.com/maps/api/geocode/json"

    zipcode = row['Zipcode']

    # update address key value to zipcode
    params['address'] = zipcode

    # make request
    zips_lat_lng = requests.get(base_geocode_url, params=params)
    
    # convert to json
    zips_lat_lng = zips_lat_lng.json()
    #print(json.dumps(zips_lat_lng, indent=4, sort_keys=True))
    
    #loop through address to find city
    for i in zips_lat_lng['results'][0]['address_components']:
        if i['types'][0] == 'locality':
            zip_group_df.loc[index, "City"] = i['long_name']
    
    #loop though address to find state
    for i in zips_lat_lng['results'][0]['address_components']:
        if i['types'][0] == 'administrative_area_level_1':
            zip_group_df.loc[index, "State"] = i['short_name']
    zip_group_df.loc[index, "Lat"] = zips_lat_lng["results"][0]["geometry"]["location"]["lat"]
    zip_group_df.loc[index, "Lng"] = zips_lat_lng["results"][0]["geometry"]["location"]["lng"]

# Visualize to confirm lat lng appear
zip_group_df

Unnamed: 0,Zipcode,PropertyValueCount,MeanPropertyValue,MeanPropertyDollarValue,Lat,Lng,City,State
0,75205,6415,1.606362e+06,"$1,606,362",32.8326,-96.7976,Dallas,TX
1,75225,7633,1.207824e+06,"$1,207,824",32.8695,-96.7896,Dallas,TX
2,75201,1326,9.667580e+05,"$966,758",32.7863,-96.7963,Dallas,TX
3,75209,5424,7.230552e+05,"$723,055",32.8539,-96.819,Dallas,TX
4,75230,9337,7.046378e+05,"$704,638",32.9005,-96.7869,Dallas,TX
5,75252,262,6.376967e+05,"$637,697",33.0009,-96.7976,Dallas,TX
6,75229,9031,6.056789e+05,"$605,679",32.8958,-96.8726,Dallas,TX
7,75220,5735,6.025323e+05,"$602,532",32.8622,-96.8726,Dallas,TX
8,75214,10429,5.574637e+05,"$557,464",32.8243,-96.744,Dallas,TX
9,75039,2333,4.344581e+05,"$434,458",32.8917,-96.9478,Irving,TX


In [10]:
#Get 5 Zipcodes with highest values
zip_group_top_and_bottom = zip_group_df.head(5)

In [11]:
#Get 5 Zipcodes with lowest values
zip_group_top_and_bottom = zip_group_top_and_bottom.append(zip_group_df.tail(5))
zip_group_top_and_bottom

Unnamed: 0,Zipcode,PropertyValueCount,MeanPropertyValue,MeanPropertyDollarValue,Lat,Lng,City,State
0,75205,6415,1606362.0,"$1,606,362",32.8326,-96.7976,Dallas,TX
1,75225,7633,1207824.0,"$1,207,824",32.8695,-96.7896,Dallas,TX
2,75201,1326,966758.0,"$966,758",32.7863,-96.7963,Dallas,TX
3,75209,5424,723055.2,"$723,055",32.8539,-96.819,Dallas,TX
4,75230,9337,704637.8,"$704,638",32.9005,-96.7869,Dallas,TX
86,75215,4430,53040.15,"$53,040",32.7557,-96.7655,Dallas,TX
87,76065,1,44860.0,"$44,860",32.4805,-96.9639,Midlothian,TX
88,75125,121,42103.22,"$42,103",32.5204,-96.6424,Ferris,TX
89,75247,6,35345.0,"$35,345",32.8167,-96.8834,Dallas,TX
90,75210,1681,33010.14,"$33,010",32.7654,-96.7414,Dallas,TX


In [33]:
def findPlaces(params):
    #set global variable so name is returned
    global next_page_token
    global results_list

    #Google Places Search
    base_places_url = "https://maps.googleapis.com/maps/api/place/nearbysearch/json"

    response = requests.get(base_places_url, params=params)
    
    search_results = json.loads(response.text)
    results_list = search_results["results"]
    
    #Set next page token. Default is None.
    next_page_token = search_results.get("next_page_token", None)

    return next_page_token, results_list

In [68]:
# params dictionary to update each iteration
for each_type in types:
    
    params = {
        #3 mi radius. A Zipcode is not returned in the results, so we cannot 
        #match against our zipcode without doing a reverse lookup for every result
        "radius": 4828,
        "types": each_type,
        "keyword": "bank",
        "key": api_key,
    }
    
    #variables for the specific column names for the business type we are searching
    count_column = f"{each_type}_count"
    rating_column = f"{each_type}_rating"
    
    #add columns for each type we looking up
    zip_group_df[count_column] = 0
    zip_group_df[rating_column] = 0

   
    # Use the lat/lng we recovered to search for businesses
    for index, row in zip_group_top_and_bottom.iterrows():

        rating_sum = 0
        rating_count = 0
        business_count = 0

        next_page_token = "" #initialize the page token

        # get lat, lng from df
        lat = row["Lat"]
        lng = row["Lng"]

        # change location each iteration while leaving original params in place
        #params["location"] = f"32.8326,-96.7976" #testing 1 lat, lng
        params["location"] = f"{lat},{lng}"

        while next_page_token != None:
            
            params["pagetoken"] = next_page_token
            
            #call places function
            findPlaces(params)
            time.sleep(2) #have to add delay because it can take a moment for the pagetoken is actually available

            #Loop through results list to count each business and get rating for each 
            for each_result in results_list:
                if each_type == 'bank' and not "ATM" in each_result["name"]:
                    business_count+=1
                #check for KeyError since not all business have a rating
                try:
                    #print(f'{each_result["name"]}: {each_result["rating"]}')
                    rating = each_result["rating"]
                    rating_count += 1
                    rating_sum += rating
                except(KeyError):
                    next
            
            #Set Rating to 0 if there are not businesses returned
            try:
                zip_group_df.loc[index, rating_column] = rating_sum / rating_count
            except(ZeroDivisionError):
                zip_group_df.loc[index, rating_column] = 0
                #detail_df = pd.DataFrame.from_dict(results_list, orient='columns')
                
            #Increment the business count
            zip_group_df.loc[index, count_column] = business_count
            
        print(f'Total number of {each_type}s found for {row["Zipcode"]}: {business_count}')

Total number of banks found for 75205: 60
Total number of banks found for 75225: 60
Total number of banks found for 75201: 60
Total number of banks found for 75209: 60
Total number of banks found for 75230: 60
Total number of banks found for 75215: 52
Total number of banks found for 76065: 6
Total number of banks found for 75125: 1
Total number of banks found for 75247: 18
Total number of banks found for 75210: 15


In [74]:
zip_group_top_and_bottom

Unnamed: 0,Zipcode,PropertyValueCount,MeanPropertyValue,MeanPropertyDollarValue,Lat,Lng,City,State
0,75205,6415,1606362.0,"$1,606,362",32.8326,-96.7976,Dallas,TX
1,75225,7633,1207824.0,"$1,207,824",32.8695,-96.7896,Dallas,TX
2,75201,1326,966758.0,"$966,758",32.7863,-96.7963,Dallas,TX
3,75209,5424,723055.2,"$723,055",32.8539,-96.819,Dallas,TX
4,75230,9337,704637.8,"$704,638",32.9005,-96.7869,Dallas,TX
86,75215,4430,53040.15,"$53,040",32.7557,-96.7655,Dallas,TX
87,76065,1,44860.0,"$44,860",32.4805,-96.9639,Midlothian,TX
88,75125,121,42103.22,"$42,103",32.5204,-96.6424,Ferris,TX
89,75247,6,35345.0,"$35,345",32.8167,-96.8834,Dallas,TX
90,75210,1681,33010.14,"$33,010",32.7654,-96.7414,Dallas,TX


In [91]:
# Store latitude and longitude in locations
top_zip_locations_to_map = list(zip(zip_group_top_and_bottom.head()['Lat'], zip_group_top_and_bottom.head()['Lng']))
bottom_zip_locations_to_map = list(zip(zip_group_top_and_bottom.tail()['Lat'], zip_group_top_and_bottom.tail()['Lng']))

In [93]:
top_zip_locations_to_map

[(32.7556954, -96.76545949999999),
 (32.4804629, -96.9638624),
 (32.5203683, -96.642433),
 (32.8166692, -96.8833562),
 (32.765421, -96.7413706)]

In [94]:
bottom_zip_locations_to_map

[(32.7556954, -96.76545949999999),
 (32.4804629, -96.9638624),
 (32.5203683, -96.642433),
 (32.8166692, -96.8833562),
 (32.765421, -96.7413706)]

In [95]:
# Customize the size of the figure
figure_layout = {
    'width': '600px',
    'height': '600px',
    'border': '1px solid black',
    'padding': '1px',
    'margin': '0 auto 0 auto'
}

fig = gmaps.figure(layout=figure_layout)

In [96]:
# Assign the symbols layer to a variable
symbols_top = gmaps.symbol_layer(
        top_zip_locations_to_map, fill_color='blue', stroke_color='blue')
symbols_bottom = gmaps.symbol_layer(
        bottom_zip_locations_to_map, fill_color='red', stroke_color='red')

# Add the layer to the map
fig.add_layer(symbols_top)
fig.add_layer(symbols_bottom)
fig

Figure(layout=FigureLayout(border='1px solid black', height='600px', margin='0 auto 0 auto', padding='1px', wi…