In [1]:
#widen display #thanks Nick!
from IPython.core.display import display, HTML, clear_output
display(HTML("<style>.container { width:95% !important; }</style>"))

In [2]:
#import libraries
import pandas as pd
import numpy as np
import matplotlib as plt
import requests
import json
import gmaps
import time

# API Keys
from config import api_key

# Configure gmaps
gmaps.configure(api_key=api_key)

In [3]:
def findPlaces(params):
    #set global variable so name is returned
    global next_page_token
    global results_list

    #Google Places Search
    base_places_url = "https://maps.googleapis.com/maps/api/place/nearbysearch/json"

    response = requests.get(base_places_url, params=params)
    
    search_results = json.loads(response.text)
    results_list = search_results["results"]
    
    #Set next page token. Default is None.
    next_page_token = search_results.get("next_page_token", None)
    
    time.sleep(4) #add delay because it can take a moment before the the page token is actually available to make the subsequent call

    return next_page_token, results_list

In [4]:
# Read in the Dallas County Appraisal District (DCAD) property values file
file = "Resources/dcad_combined.csv"

#create DataFrame
dcad_df = pd.read_csv(file, usecols=['PROPERTY_ZIPCODE', 'TOT_VAL','SPTD_CODE'])
dcad_df.head(5)

Unnamed: 0,PROPERTY_ZIPCODE,TOT_VAL,SPTD_CODE
0,750513060,103500,A11
1,750513040,145500,A11
2,750502208,168040,A11
3,750502277,200040,A11
4,750617840,151880,A11


In [5]:
#add column for 5 digit zipcode
dcad_df['ZIPCODE'] = dcad_df['PROPERTY_ZIPCODE'].astype(str).str[:5]
dcad_df.head(5)

Unnamed: 0,PROPERTY_ZIPCODE,TOT_VAL,SPTD_CODE,ZIPCODE
0,750513060,103500,A11,75051
1,750513040,145500,A11,75051
2,750502208,168040,A11,75050
3,750502277,200040,A11,75050
4,750617840,151880,A11,75061


In [6]:
#Group Property Values by Zipcode
zip_group = dcad_df.groupby('ZIPCODE')['TOT_VAL']
zip_group.describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
ZIPCODE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
75001,1989.0,3.152085e+05,9.708673e+04,100.0,269610.0,331790.0,377690.0,640000.0
75006,11095.0,2.027066e+05,7.757599e+04,160.0,147125.0,191440.0,256520.0,631690.0
75007,583.0,2.004114e+05,5.800278e+04,1800.0,168440.0,196510.0,238275.0,369430.0
75019,11346.0,4.157977e+05,1.777850e+05,110.0,297412.5,385400.0,490000.0,2650000.0
75038,2985.0,3.584369e+05,3.124792e+05,100.0,163300.0,254610.0,434430.0,2526210.0
75039,2333.0,4.344581e+05,1.701174e+05,3200.0,316510.0,418640.0,505880.0,1279790.0
75040,17323.0,1.667878e+05,5.966218e+04,200.0,121980.0,161920.0,207470.0,456260.0
75041,7639.0,1.399951e+05,4.764328e+04,230.0,109055.0,132000.0,161420.0,457860.0
75042,8297.0,1.543849e+05,3.574186e+04,130.0,132860.0,150950.0,171340.0,534130.0
75043,16969.0,1.739277e+05,6.973162e+04,100.0,135200.0,164330.0,204570.0,1260000.0


In [7]:
zip_group_df = dcad_df.groupby('ZIPCODE')['TOT_VAL'].agg(('count', 'mean'))\
                    .rename(columns={'count':'PropertyValueCount', 'mean':'MeanPropertyValue'})\
                    .sort_values(by='MeanPropertyValue', ascending=False)\
                    .reset_index()

In [8]:
#Add columns to DataFrame to store business data
zip_group_df["MeanPropertyDollarValue"] = zip_group_df["MeanPropertyValue"].map("${:,.0f}".format)
zip_group_df['Lat'] = "" 
zip_group_df["Lng"] = ""
zip_group_df["City"] = ""
zip_group_df["State"] = ""
zip_group_df = zip_group_df.rename(columns={"ZIPCODE": "Zipcode"})

In [9]:
zip_group_df.head()

Unnamed: 0,Zipcode,PropertyValueCount,MeanPropertyValue,MeanPropertyDollarValue,Lat,Lng,City,State
0,75205,6415,1606362.0,"$1,606,362",,,,
1,75225,7633,1207824.0,"$1,207,824",,,,
2,75201,1326,966758.0,"$966,758",,,,
3,75209,5424,723055.2,"$723,055",,,,
4,75230,9337,704637.8,"$704,638",,,,


In [10]:
# create a params dict that will be updated with new zipcode each iteration
params = {"key": api_key}

# Loop through the zipcode pd's and run a lat/long search for each
for index, row in zip_group_df.iterrows():
    base_geocode_url = "https://maps.googleapis.com/maps/api/geocode/json"

    zipcode = row['Zipcode']

    # update address key value to zipcode
    params['address'] = zipcode

    # make request
    zips_lat_lng = requests.get(base_geocode_url, params=params)
    
    # convert to json
    zips_lat_lng = zips_lat_lng.json()
    #print(json.dumps(zips_lat_lng, indent=4, sort_keys=True))
    
    #loop through address to find city
    for i in zips_lat_lng['results'][0]['address_components']:
        if i['types'][0] == 'locality':
            zip_group_df.loc[index, "City"] = i['long_name']
    
    #loop though address to find state
    for i in zips_lat_lng['results'][0]['address_components']:
        if i['types'][0] == 'administrative_area_level_1':
            zip_group_df.loc[index, "State"] = i['short_name']
    
    #Update Lat, Long in Dataframe
    zip_group_df.loc[index, "Lat"] = zips_lat_lng["results"][0]["geometry"]["location"]["lat"]
    zip_group_df.loc[index, "Lng"] = zips_lat_lng["results"][0]["geometry"]["location"]["lng"]

In [11]:
#visualize to lat/lng have been populated
zip_group_df.head()

Unnamed: 0,Zipcode,PropertyValueCount,MeanPropertyValue,MeanPropertyDollarValue,Lat,Lng,City,State
0,75205,6415,1606362.0,"$1,606,362",32.8326,-96.7976,Dallas,TX
1,75225,7633,1207824.0,"$1,207,824",32.8695,-96.7896,Dallas,TX
2,75201,1326,966758.0,"$966,758",32.7863,-96.7963,Dallas,TX
3,75209,5424,723055.2,"$723,055",32.8539,-96.819,Dallas,TX
4,75230,9337,704637.8,"$704,638",32.9005,-96.7869,Dallas,TX


In [12]:
#Google Places Types: https://developers.google.com/places/web-service/supported_types

#The types list can include multiple categories. A count and mean rating will be added to the final DataFrame for each type
types = ['bank','supermarket','hospital', 'cafe']

bank_names = ['bank', 'credit union']
not_supermarket_names = ['dollar', 'cents', 'liquor', 'convenience']
hospital_names = ['hospital', 'medical center', 'parkland']
cafe_names = ['starbucks']

In [16]:
# params dictionary to update each iteration
for each_type in types:
    
    #clear console before starting a new type
    clear_output()
    
    params = {
        #3 mi radius. A Zipcode is not returned in the results, so we cannot 
        #match against our zipcode without doing a reverse lookup for every result
        "radius": 4828,
        "types": each_type,
        "key": api_key,
    }
    
    if each_type == 'cafe':
        params["keyword"] = 'starbucks' 
    else: 
        params["keyword"] = None
    
    #variables for the specific column names for the business type we are searching
    count_column = f"{each_type}_count"
    rating_column = f"{each_type}_rating"
    
    #add columns for each type we looking up
    zip_group_df[count_column] = 0
    zip_group_df[rating_column] = 0

   
    # Use the lat/lng we recovered to search for businesses
    for index, row in zip_group_df.iterrows():

        rating_sum = 0
        rating_count = 0
        business_count = 0

        next_page_token = "" #initialize the page token

        # get lat, lng from df
        lat = row["Lat"]
        lng = row["Lng"]

        # change location each iteration while leaving original params in place
        #params["location"] = f"32.8326,-96.7976" #testing 1 lat, lng
        params["location"] = f"{lat},{lng}"

        while next_page_token != None:
            
            params["pagetoken"] = next_page_token
            
            #call places function
            findPlaces(params)

            #Loop through results list to count each business and get rating for each 
            for each_result in results_list:
                
                #get name in lower case
                business_name_lower = each_result["name"].lower()

                #check to make sure a Rating since not all business have a rating
                if each_result.get("rating") != None:
                    
                    if each_type == "bank" and any(n in business_name_lower for n in bank_names):
                        business_count+=1
                        rating = each_result["rating"]
                        rating_sum += rating                        
                    
                    if each_type == "hospital" and any(n in business_name_lower for n in hospital_names):
                        business_count+=1
                        rating = each_result["rating"]
                        rating_sum += rating     
                    
                    if each_type == "supermarket" and not(any(n in business_name_lower for n in not_supermarket_names)):
                        business_count+=1
                        rating = each_result["rating"]
                        rating_sum += rating
                        
                    if each_type == "cafe" and any(n in business_name_lower for n in cafe_names):
                        business_count+=1
                        rating = each_result["rating"]
                        rating_sum += rating                             
                                
                    #for printing to console
                    info = f'{row["Zipcode"]} | {each_result["name"]} | location: ({round(each_result["geometry"]["location"]["lat"],2)}, {round(each_result["geometry"]["location"]["lng"],2)}) | rating: {each_result.get("rating",0)}'
                    print(info)
            
            #Set Mean Rating to None if no businesses found
            try:
                mean_rating = rating_sum / business_count
                zip_group_df.loc[index, rating_column] = mean_rating
            except(ZeroDivisionError):
                zip_group_df.loc[index, rating_column] = None
                
            #Set business count in dataframe
            zip_group_df.loc[index, count_column] = business_count
            
        print(f'\nTotal number of {each_type}s found for {row["Zipcode"]}: {business_count} | rating_sum: {round(rating_sum,2)} | avg_rating: {round(mean_rating,2)}')
        print(f'------------------------------------------------------------------------------------------------------\n')

75205 | Starbucks | location: (32.84, -96.78) | rating: 4.1
75205 | Starbucks | location: (32.82, -96.79) | rating: 4.2
75205 | Starbucks | location: (32.84, -96.78) | rating: 4.2
75205 | Starbucks | location: (32.86, -96.77) | rating: 4.3
75205 | Starbucks | location: (32.85, -96.77) | rating: 3.8
75205 | Starbucks | location: (32.81, -96.8) | rating: 4.1
75205 | Starbucks | location: (32.85, -96.77) | rating: 4.1
75205 | Starbucks | location: (32.79, -96.8) | rating: 4.8
75205 | Starbucks | location: (32.86, -96.78) | rating: 3.4
75205 | Starbucks | location: (32.87, -96.77) | rating: 4.4
75205 | Starbucks | location: (32.84, -96.8) | rating: 4.3
75205 | Starbucks | location: (32.87, -96.76) | rating: 4.3
75205 | Starbucks | location: (32.87, -96.77) | rating: 3.9
75205 | Starbucks | location: (32.87, -96.81) | rating: 4.3
75205 | Starbucks at SMU Fondren Library | location: (32.84, -96.78) | rating: 3.8
75205 | Starbucks | location: (32.86, -96.75) | rating: 3.5
75205 | Starbucks | 

75230 | Starbucks | location: (32.9, -96.74) | rating: 4.1
75230 | Starbucks | location: (32.91, -96.74) | rating: 3.4
75230 | Starbucks | location: (32.91, -96.77) | rating: 4.1
75230 | Starbucks | location: (32.93, -96.77) | rating: 4.2
75230 | Starbucks | location: (32.92, -96.82) | rating: 4
75230 | Starbucks | location: (32.9, -96.8) | rating: 4.3
75230 | Starbucks | location: (32.86, -96.77) | rating: 4.3
75230 | Starbucks | location: (32.91, -96.8) | rating: 4.3
75230 | starbucks | location: (32.88, -96.73) | rating: 3.9
75230 | Starbucks | location: (32.87, -96.75) | rating: 3.8
75230 | Starbucks | location: (32.86, -96.78) | rating: 3.4
75230 | Starbucks | location: (32.87, -96.77) | rating: 4.4
75230 | Starbucks | location: (32.87, -96.76) | rating: 4.3
75230 | Starbucks | location: (32.87, -96.77) | rating: 3.9
75230 | Starbucks | location: (32.87, -96.81) | rating: 4.3
75230 | Starbucks | location: (32.93, -96.8) | rating: 4
75230 | Starbucks | location: (32.86, -96.75) | r

75248 | Starbucks | location: (32.95, -96.77) | rating: 4.2
75248 | Starbucks | location: (32.98, -96.77) | rating: 4
75248 | Starbucks | location: (32.96, -96.79) | rating: 4.1
75248 | Starbucks | location: (32.95, -96.74) | rating: 3.8
75248 | Starbucks | location: (32.98, -96.77) | rating: 4.2
75248 | Starbucks | location: (32.93, -96.8) | rating: 4
75248 | Starbucks | location: (32.93, -96.82) | rating: 4.3
75248 | Starbucks | location: (32.93, -96.82) | rating: 4.2
75248 | Starbucks | location: (32.98, -96.77) | rating: 3.7
75248 | Starbucks | location: (32.95, -96.81) | rating: 4
75248 | Starbucks | location: (32.99, -96.75) | rating: 3.9
75248 | Starbucks | location: (32.95, -96.82) | rating: 4.2
75248 | Starbucks | location: (33.0, -96.79) | rating: 4.1
75248 | Starbucks | location: (33.0, -96.73) | rating: 4.4
75248 | Starbucks | location: (33.0, -96.83) | rating: 4.1
75248 | Starbucks / Rims Café | location: (32.96, -96.82) | rating: 2.9
75248 | Starbucks | location: (33.02, 

75038 | Starbucks | location: (32.84, -96.98) | rating: 3.9
75038 | Starbucks | location: (32.84, -96.99) | rating: 3.4
75038 | Starbucks | location: (32.87, -97.01) | rating: 3.5
75038 | Starbucks | location: (32.91, -96.96) | rating: 2.9
75038 | Starbucks | location: (32.86, -96.96) | rating: 3.9
75038 | Starbucks | location: (32.89, -96.96) | rating: 4.1
75038 | Starbucks | location: (32.87, -97.01) | rating: 2.8
75038 | Starbucks | location: (32.91, -96.96) | rating: 3.5
75038 | Starbucks | location: (32.91, -97.04) | rating: 4
75038 | Starbucks | location: (32.87, -97.04) | rating: 3.9
75038 | Starbucks | location: (32.9, -97.04) | rating: 2.8
75038 | Starbucks | location: (32.91, -97.04) | rating: 3.4
75038 | Starbucks | location: (32.85, -97.04) | rating: 4.3
75038 | Starbucks | location: (32.9, -97.04) | rating: 3.2
75038 | Starbucks | location: (32.9, -97.04) | rating: 3.6
75038 | Starbucks | location: (32.87, -97.04) | rating: 3.6
75038 | Starbucks | location: (32.89, -97.03)

75208 | Starbucks | location: (32.78, -96.8) | rating: 4.3
75208 | Starbucks | location: (32.74, -96.86) | rating: 3.5
75208 | Starbucks | location: (32.78, -96.8) | rating: 3.9
75208 | Starbucks | location: (32.78, -96.8) | rating: 4.1
75208 | Starbucks | location: (32.79, -96.8) | rating: 4.1
75208 | Starbucks | location: (32.78, -96.8) | rating: 4.3
75208 | Starbucks | location: (32.76, -96.9) | rating: 4
75208 | Starbucks | location: (32.79, -96.8) | rating: 3.2
75208 | Starbucks | location: (32.79, -96.81) | rating: 4.3
75208 | Urban Coffee | location: (32.78, -96.8) | rating: 4.4
75208 | Coffee's Post | location: (32.78, -96.81) | rating: 4.3
75208 | The Cafe At Bryan Tower | location: (32.79, -96.8) | rating: 3.2

Total number of cafes found for 75208: 9 | rating_sum: 35.7 | avg_rating: 3.97
------------------------------------------------------------------------------------------------------

75287 | Starbucks | location: (33.03, -96.84) | rating: 4.2
75287 | Starbucks | locati

75202 | Starbucks | location: (32.8, -96.83) | rating: 3.5
75202 | Starbucks | location: (32.79, -96.81) | rating: 4.3
75202 | Starbucks | location: (32.74, -96.86) | rating: 3.5
75202 | Columbus Square | location: (32.8, -96.8) | rating: 4.3
75202 | Urban Coffee | location: (32.78, -96.8) | rating: 4.4
75202 | Coffee's Post | location: (32.78, -96.81) | rating: 4.3

Total number of cafes found for 75202: 23 | rating_sum: 91.0 | avg_rating: 3.96
------------------------------------------------------------------------------------------------------

75088 | Starbucks | location: (32.91, -96.56) | rating: 3.9
75088 | Starbucks | location: (32.91, -96.58) | rating: 4.2
75088 | Starbucks | location: (32.92, -96.52) | rating: 5
75088 | Starbucks | location: (32.92, -96.52) | rating: 4.3

Total number of cafes found for 75088: 4 | rating_sum: 17.4 | avg_rating: 4.35
------------------------------------------------------------------------------------------------------

75231 | Starbucks | loca

75246 | Starbucks | location: (32.81, -96.73) | rating: 4.2
75246 | Starbucks | location: (32.8, -96.79) | rating: 2
75246 | Starbucks | location: (32.81, -96.8) | rating: 4.1
75246 | Starbucks | location: (32.79, -96.8) | rating: 4.8
75246 | Starbucks | location: (32.84, -96.78) | rating: 4.1
75246 | Starbucks | location: (32.78, -96.8) | rating: 3.9
75246 | Starbucks | location: (32.79, -96.78) | rating: 3.7
75246 | Starbucks | location: (32.82, -96.79) | rating: 4.2
75246 | Starbucks | location: (32.81, -96.79) | rating: 3.6
75246 | Starbucks | location: (32.8, -96.79) | rating: 4.3
75246 | Starbucks | location: (32.84, -96.78) | rating: 4.5
75246 | Starbucks | location: (32.84, -96.75) | rating: 4.1
75246 | Starbucks | location: (32.84, -96.78) | rating: 4.2
75246 | Starbucks | location: (32.82, -96.8) | rating: 4.3
75246 | Starbucks | location: (32.79, -96.75) | rating: 3.6
75246 | Starbucks | location: (32.8, -96.8) | rating: 4.4
75246 | Starbucks | location: (32.84, -96.77) | ra

75226 | Starbucks | location: (32.81, -96.83) | rating: 3.9
75226 | Starbucks | location: (32.79, -96.8) | rating: 3.6
75226 | Starbucks | location: (32.8, -96.83) | rating: 3.5
75226 | Starbucks | location: (32.79, -96.75) | rating: 3.6
75226 | Starbucks | location: (32.79, -96.81) | rating: 4.3
75226 | The Cafe At Bryan Tower | location: (32.79, -96.8) | rating: 3.2
75226 | Columbus Square | location: (32.8, -96.8) | rating: 4.3
75226 | Urban Coffee | location: (32.78, -96.8) | rating: 4.4
75226 | Coffee's Post | location: (32.78, -96.81) | rating: 4.3
75226 | Counter Offer | location: (32.8, -96.83) | rating: 2.8

Total number of cafes found for 75226: 25 | rating_sum: 99.5 | avg_rating: 3.98
------------------------------------------------------------------------------------------------------

75236 | Starbucks | location: (32.65, -96.92) | rating: 3.9
75236 | Starbucks | location: (32.71, -96.95) | rating: 5

Total number of cafes found for 75236: 2 | rating_sum: 8.9 | avg_rating:

75212 | Starbucks | location: (32.82, -96.87) | rating: 4.1
75212 | Starbucks | location: (32.74, -96.86) | rating: 3.5
75212 | Starbucks | location: (32.76, -96.9) | rating: 4
75212 | Starbucks | location: (32.83, -96.85) | rating: 4.5
75212 | Starbucks Inside Childrens 2350 | location: (32.81, -96.84) | rating: 0

Total number of cafes found for 75212: 5 | rating_sum: 16.1 | avg_rating: 3.22
------------------------------------------------------------------------------------------------------


Total number of cafes found for 75217: 0 | rating_sum: 0 | avg_rating: 3.22
------------------------------------------------------------------------------------------------------

75203 | Starbucks | location: (32.78, -96.8) | rating: 4.3
75203 | Starbucks | location: (32.78, -96.8) | rating: 4.1
75203 | Starbucks | location: (32.78, -96.8) | rating: 4.3
75203 | Starbucks | location: (32.78, -96.8) | rating: 3.9
75203 | Starbucks | location: (32.79, -96.8) | rating: 4.1
75203 | Starbucks | loc

In [17]:
#create file to reference in visualizations (2_visualize-property-values-and-businesses.ipynb) so we don't have to make api calls during presentation
#zip_group_df.to_csv('Resources/zip_group_csv.csv')

In [20]:
zip_group_df.head()

Unnamed: 0,Zipcode,PropertyValueCount,MeanPropertyValue,MeanPropertyDollarValue,Lat,Lng,City,State,bank_count,bank_rating,supermarket_count,supermarket_rating,hospital_count,hospital_rating,cafe_count,cafe_rating
0,75205,6415,1606362.0,"$1,606,362",32.8326,-96.7976,Dallas,TX,41,3.595122,13,4.415385,15,3.946667,46,3.819565
1,75225,7633,1207824.0,"$1,207,824",32.8695,-96.7896,Dallas,TX,37,3.483784,8,4.3625,5,4.16,28,3.903571
2,75201,1326,966758.0,"$966,758",32.7863,-96.7963,Dallas,TX,35,3.477143,12,4.208333,14,3.714286,26,4.003846
3,75209,5424,723055.2,"$723,055",32.8539,-96.819,Dallas,TX,35,3.482857,11,4.172727,9,4.111111,33,3.960606
4,75230,9337,704637.8,"$704,638",32.9005,-96.7869,Dallas,TX,40,3.3725,6,4.166667,14,3.628571,21,4.071429
