In [6]:
####################################################Importing Dependencies##################################################### 
###############################################################################################################################

import requests
import numba as nb
# For getting the distance between two coordinates
from math import radians, cos, sin, asin, sqrt
# from shapely import geometry
# from scipy import stats
import json
import pandas as pd
import numpy as np
# normalize the json data and place it in a Dataframe
from pandas.io.json import json_normalize
# Saving gmaps to html
from ipywidgets.embed import embed_minimal_html
# Display html content
from IPython.display import IFrame
from IPython.core.display import display
import gmaps
import gmaps.datasets
import os
import time
# Google API Key
#from config import gkey

In [7]:
#Using the haversine function by passing the Coordinates of the Attractions VS the Crime Coordinates
def haversine(lon1, lat1, lon2, lat2):
   
    # convert decimal degrees to radians 
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])

    # haversine formula 
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a)) 
    r = 3956 # Radius of earth in miles.
    return c * r

In [8]:
####################################################CSV Data Extraction and Cleanup############################################ 
###############################################################################################################################
gkey = "AIzaSyAmpcf4PCVP54Sw3R_macfn8oA00_jaeGc"
def attraction_map(df, gkey='', years =[]):

    #Data Cleanup
    print('1. start  data clean ..... ', time.asctime(time.localtime(time.time())) )
    selected_columns = ['ID','Date','Primary Type','Arrest','Latitude','Longitude']
    chicago_selected_df = df.loc[:, selected_columns]

    #Drop the blank values
    chicago_cleaned_df = chicago_selected_df.dropna(axis= 0, how = 'any')

    #Sorting values by ID and dropping the index
    #chicago_cleaned_df = chicago_cleaned_df.sort_values(by=['ID'], ascending=False)

    chicago_cleaned_df.reset_index(drop = True, inplace = True)

    # #Cleaning up the data using the date
    chicago_cleaned_df.Date = pd.to_datetime(chicago_cleaned_df.Date)

    # # #Getting the Year from the date
    chicago_cleaned_df["Year"] = chicago_cleaned_df["Date"].dt.to_period('Y').apply(str)

    # # # Remove date before 2009
    chicago_cleaned_df = chicago_cleaned_df.loc[(chicago_cleaned_df["Year"] == years[0]) | \
                                                (chicago_cleaned_df["Year"] == years[1]) , : ]
    ########################Geocoordinates (latitude and longitude) of Chicago,Illinois from Google API###############
    ##################################################################################################################
    print('2. start  Geocoordinates from Google ..... ', time.asctime(time.localtime(time.time())) )
    target_city = "Chicago, Illinois"

    params = {"address": target_city, "key": gkey}

    # Build URL using the Google Maps API
    base_url = "https://maps.googleapis.com/maps/api/geocode/json"

    # Run request
    response_coor = requests.get(base_url, params=params)

    # Convert to JSON
    chicago_geo = response_coor.json()

    #print(json.dumps(chicago_geo , indent=4, sort_keys=True))
    # Extract lat/lng
    chicago_lat = chicago_geo["results"][0]["geometry"]["location"]["lat"]
    chicago_lng = chicago_geo["results"][0]["geometry"]["location"]["lng"]
    ########################Get the points of interest from Chicago Citys from Google "Text Search" API#################
    print('3. start Get the points of interest from Google ..... ', time.asctime(time.localtime(time.time())) )
    # Build URL using the Google Maps API
    base_url = "https://maps.googleapis.com/maps/api/place/textsearch/json"

    search_key = "chicago tourist interests"

    language = "en"

    # rewrite the params in a dict

    params = {
        "query": search_key,
        "language":language,
        "key": gkey
    }

    print("Popular Attractions in Chicago, IL")

    # Run a get request with the parameters
    response = requests.get(base_url, params)

    chicago_attractions = response.json()

    json_size = len(chicago_attractions["results"])

    attractions_chicago = json_normalize(chicago_attractions['results'])

    columns_attr = ['name','geometry.location.lat','geometry.location.lng','formatted_address','place_id','rating']
    attractions_df = attractions_chicago.loc[:, columns_attr]
    attractions_df = attractions_df.rename(columns={"geometry.location.lat":"Attr Latitude",\
                                                    "geometry.location.lng":"Attr Longitude","place_id":"Place ID"})
    ####################################Calculate distance between two points on the earth#############################
    print('4. start Calculate distance between two points ..... ', time.asctime(time.localtime(time.time())) )
    #Initialize lists to get values assigned
    final_latitude = []
    final_longitude = []
    attraction_name = []
    crime_type = []
    #Itetrate through the two dataframes (the first one is the actual data with 6 million records)
    for i in range(len(chicago_cleaned_df)):
        for j in range(len(attractions_df)):
            if (haversine(attractions_df["Attr Longitude"][j],attractions_df["Attr Latitude"][j],\
                          chicago_cleaned_df["Longitude"][i],chicago_cleaned_df["Latitude"][i])<= 0.25):
                final_latitude.append(chicago_cleaned_df["Latitude"][i])
                final_longitude.append(chicago_cleaned_df["Longitude"][i])
                attraction_name.append(attractions_df["name"][j])
                crime_type.append(chicago_cleaned_df["Primary Type"][i])
    #Place the values in a dataframe
    filtered_crime_df = pd.DataFrame({"Attraction Name":attraction_name,"Crime Type":crime_type,\
                                      "Lat":final_latitude,"Lon":final_longitude})
    ####################################Building the Dataframe for GMAPS###############################################

    # Grouping by the Crime Data Frame in order to get the Top Crime type and the Total Count of Crimes
    filtered_crime_df= filtered_crime_df.sort_values(by=['Attraction Name'])
    attractions_grouped_counts = filtered_crime_df.groupby(["Attraction Name"])["Crime Type"].\
                                  apply(lambda x: x.value_counts().index[0]).reset_index()
    attractions_grouped_counts = attractions_grouped_counts.sort_values(by=['Attraction Name'])
    attractions_grouped_counts["Crime Count"] = filtered_crime_df.groupby(["Attraction Name"])["Crime Type"]\
                                                .count().tolist()
    attractions_grouped_counts = attractions_grouped_counts.rename(columns={"Crime Type":"Top Crime"})
    attractions_grouped_counts["Second Top Crime"] = \
                        filtered_crime_df.groupby(["Attraction Name"])["Crime Type"].apply(lambda x:\
                        x.value_counts().index[1]).reset_index().sort_values(by=['Attraction Name'])["Crime Type"].tolist()
    attractions_grouped_counts["Attraction Name"] = attractions_grouped_counts["Attraction Name"].astype(str)

    ############################ Merge the two dataframes to get the Attraction related Data ##########################
    cleaned_attractions_df= \
                    attractions_chicago.filter(['name','geometry.location.lat','geometry.location.lng','formatted_address'\
                                                ,'rating','place_id'], axis=1)
    cleaned_attractions_df.columns = ["Attraction Name","Latitude","Longitude","Address","Rating","Place ID"]
    cleaned_attractions_df["Attraction Name"] = cleaned_attractions_df["Attraction Name"].astype(str)
    cleaned_attractions_merged = pd.merge(cleaned_attractions_df, attractions_grouped_counts, on="Attraction Name")
    ########################################Building GMAPS with markers and heat map ##################################
    print('5. start Building GMAPS with markers and heat map..... ', time.asctime(time.localtime(time.time())) )
    # Configure Key
    gmaps.configure(api_key=gkey)

    # Get Chicago City Coordinates from the Geolocation API
    chicago_coordinates = (chicago_lat, chicago_lng)

    # Build the layout for the gmap figure
    figure_layout = {
    'border': '3px solid red',
    'padding': '1px',
    'width': '950px',
    'height': '700px',
    }

    #Plot the gmaps for the city of Chicago
    fig = gmaps.figure(center=chicago_coordinates, zoom_level=13, layout = figure_layout)

    # Line up the data for the marker labels
    # Get the details from the merged datafraome with the final counts
    locations = list(zip(cleaned_attractions_merged["Latitude"],cleaned_attractions_merged["Longitude"]))
    print('6. start plot ..... ', time.asctime(time.localtime(time.time())) )
    # Pull out the data for the Hover and Infobox functionalities
    info_value = []
    hover_value = []
    label_value = []

    # Iterate through the dataset to get the values
    for mc in range(len(cleaned_attractions_merged)):
        hover_value.append("Total Crimes - "+ str(cleaned_attractions_merged["Crime Count"][mc]))
        info_value.append("Beware of "+ '"'+ str(cleaned_attractions_merged["Top Crime"][mc])+ "/" +\
                          str(cleaned_attractions_merged["Second Top Crime"][mc]) + '"'+ " related crimes near this attraction")
        label_value.append(str(cleaned_attractions_merged["Attraction Name"][mc]))

    # Add Marker layer with all the parameters
    markers = gmaps.marker_layer(locations, hover_text=hover_value, info_box_content=info_value, label = label_value)

    fig.add_layer(markers)

    # Get the locations for the heatmap, derived from the Haversine function
    locations_heatmap = list(zip(final_latitude,final_longitude))

    # Add the heatmap
    heatmap_layer = gmaps.heatmap_layer(locations_heatmap,max_intensity=120, point_radius=12.0)

    fig.add_layer(heatmap_layer)

    #fig
    # Add the heatmap
    embed_minimal_html("attraction_data_google_plot.html", views=[fig])
    display(IFrame("attraction_data_google_plot.html", '950px' , '700px'))

In [9]:
def demo():
    import webbrowser
    webbrowser.open_new_tab('attraction_data_google_plot.html')

In [10]:
if __name__ == '__main__':

    file = os.path.join('Resources','Crimes_2001_to_present.csv')
    crime_df = pd.read_csv(file)
    attraction_map(crime_df, gkey='AIzaSyAmpcf4PCVP54Sw3R_macfn8oA00_jaeGc', years=['2018','2019'])
    demo()

1. start  data clean .....  Fri Jul 12 06:58:27 2019


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[name] = value
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


2. start  Geocoordinates from Google .....  Fri Jul 12 07:12:22 2019
3. start Get the points of interest from Google .....  Fri Jul 12 07:12:22 2019
Popular Attractions in Chicago, IL
4. start Calculate distance between two points .....  Fri Jul 12 07:12:23 2019
5. start Building GMAPS with markers and heat map.....  Fri Jul 12 07:17:12 2019
6. start plot .....  Fri Jul 12 07:17:12 2019


error: bad escape \u at position 0