In [1]:
import pandas as pd
import numpy as np
import requests
import urllib.parse
import googlemaps
import folium
from geographiclib.geodesic import Geodesic
from geographiclib.constants import Constants
geod = Geodesic.WGS84 

import warnings
warnings.filterwarnings("ignore")

df = pd.read_csv('Boston_crimes_data.csv')

df = df.drop(columns = ['Unnamed: 0'])
    
req_offense_categories = ['robbery','purse','pocket','threat','drug','danger','fraud','murder','vandalism',
 'burglar','weapon','extortion','kidnap','bomb','arson','prostitution','explosive','breaking']


df = df[(df['YEAR'] == 2022) | (df['YEAR'] == 2021) | (df['YEAR'] == 2020) ]

df = df[df['OFFENSE_DESCRIPTION'].str.lower().str.contains('|'.join(req_offense_categories))]

df = df.drop_duplicates()

df['Weekday'] = np.where(df['DAY_OF_WEEK'].isin(['Monday','Tuesday','Wednesday','Thursday']),1,
                        np.where((df['DAY_OF_WEEK'].isin(['Friday']) & (df['HOUR'] < 17)), 1,0))


In [39]:
gmaps = googlemaps.Client(key='Insert Your Key')

def return_lat_long(address):
    url = 'https://nominatim.openstreetmap.org/search/' + urllib.parse.quote(address) +'?format=json'
    response = requests.get(url).json()
    #print(response)
    return (response[0]["lat"] , response[0]["lon"])

def get_directions(start,end,mode):
    return gmaps.directions(start, end,'walking',alternatives= True)



def create_map(index):
    # use the response
    mls = coordinates
    points = [(i[0], i[1]) for i in mls[index]]
    m = folium.Map(zoom_start = 14,control_scale = True)
    # add marker for the start and ending points
    for point in [points[0], points[-1]]:
        folium.Marker(point).add_to(m)
    ##folium.Marker([lat, lon], popup=str(name)+': '+color+'-'+str(clname), icon=folium.Icon(color=color)).add_to(feature_group)
    for i,row in crime_data[index].iterrows():
        folium.Marker((row['Lat'],row['Long']), popup = str(row['OFFENSE_DESCRIPTION']),icon=folium.Icon(color='red')).add_to(m)

    # add the lines
    folium.PolyLine(points, weight=5, opacity=1).add_to(m)
    # create optimal zoom
    df = pd.DataFrame(mls[0]).rename(columns={0:'Lon', 1:'Lat'})[['Lat', 'Lon']]
    sw = df[['Lat', 'Lon']].min().values.tolist()
    ne = df[['Lat', 'Lon']].max().values.tolist()
    m.fit_bounds([sw, ne])
    return m

class safety_indicator:
    
    def __init__(self,start,end,data,hour,df):
        self.start = start
        self.end = end
        self.hour = hour
        self.data = data
        self.df = df
        #print("Length of Data::: ",len(self.data))
        #print(self.data)
        self.route_coordinates = {}
        self.route_crimes = {}
        
    def main(self):
        for index,route in enumerate(self.data):
            #print(route)
            self.route_coordinates[index] = self.route_coordinates_indicator(route)
            self.route_crimes[index] = self.identify_crimes(self.route_coordinates[index])
            print("For the Route:: ", index," the total number of Crimes that happened on the way is:: ",self.route_crimes[index].shape[0],
                 " At the Hour:: ",self.hour, "Total different types of Crimes that happened:: ",self.route_crimes[index]['OFFENSE_DESCRIPTION'].unique())
        return self.route_coordinates,self.route_crimes
                
    def route_coordinates_indicator(self,route):
        #print('##2', route)
        total_route_coordinates = []
        for leg in route['legs']:
            for step in leg['steps']:
                step_distance = step['distance']['value']
                start_loc_lat = step['start_location']['lat']
                start_loc_lng = step['start_location']['lng']
                end_loc_lat = step['end_location']['lat']
                end_loc_lng = step['end_location']['lng']
                #print(step_distance,start_loc_lat,end_loc_lat)
                if step_distance > 100:
                    #crimes_data = crimes_data.append(self.return_number_events(df,'',19,(start_loc_lat,start_loc_lng),50))
                    bearing = self.get_bearing(start_loc_lat,start_loc_lng,end_loc_lat,end_loc_lng)
                    total_route_coordinates.append((start_loc_lat,start_loc_lng))
                    for dist in range(int(np.floor(step_distance/100))):
                        new_coordinates = self.getEndpoint(start_loc_lat,start_loc_lng,bearing,100)
                        #print('New Coordinates::', new_coordinates)
                        #crimes_data = crimes_data.append(return_number_events(df,'',19,new_coordinates,50))
                        start_loc_lat = new_coordinates[0]
                        start_loc_lng = new_coordinates[1]
                        total_route_coordinates.append((start_loc_lat,start_loc_lng))
                    #crimes_data = crimes_data.append(return_number_events(df,'',19,(end_loc_lat,end_loc_lng),50))
                    total_route_coordinates.append((end_loc_lat,end_loc_lng))        
                else:
                    total_route_coordinates.append((start_loc_lat,start_loc_lng))
                    total_route_coordinates.append((end_loc_lat,end_loc_lng))
                    
        return total_route_coordinates
    
    def identify_crimes(self,total_route_coordinates):
        crimes_data = pd.DataFrame()
        for coordinates in total_route_coordinates:
            crimes_data = crimes_data.append(self.return_number_events(self.df,'',self.hour,coordinates,50))
        return crimes_data.drop_duplicates()

    def get_bearing(self,lat1, long1, lat2, long2):
        brng = Geodesic.WGS84.Inverse(lat1, long1, lat2, long2)['azi1']
        return brng

    def getEndpoint(self,lat1, lon1, bearing, d):
        geod = Geodesic(Constants.WGS84_a, Constants.WGS84_f)
        d = geod.Direct(lat1, lon1, bearing, d)
        return (d['lat2'], d['lon2'])
    
    def return_number_events(self,df,day,hour,coordinates,distance):
        if day == '':
            day = day.lower()
            weekday = np.where(day in ['monday','tuesday','wednesday','thursday'] ,1,
                                np.where(((day in ['friday']) & (hour < 17)), 1,0))
            small_df = df[df['Weekday'] == weekday][df['HOUR'].isin([hour-1,hour,hour+1])]
            small_df['Within_distance'] = np.where(small_df.apply(lambda x: geod.Inverse(x['Lat'],x['Long'], coordinates[0],coordinates[1])['s12'],axis = 1) < distance, 1,0)
            return small_df[small_df['Within_distance'] == 1]
        else:
            small_df = df[df['HOUR'].isin([hour-1,hour,hour+1])]
            small_df['Within_distance'] = np.where(small_df.apply(lambda x: geod.Inverse(x['Lat'],x['Long'], coordinates[0],coordinates[1])['s12'],axis = 1) < distance, 1,0)
            return small_df[small_df['Within_distance'] == 1]


In [6]:
start = return_lat_long('55 traveler Street, Boston, MA 02118')
end   = return_lat_long('25 Thomson Pl, Boston, MA 02210')

data = get_directions(start,end,'walking')

safety = safety_indicator(start,end,data,19,df)

coordinates,crime_data = safety.main()


In [38]:
m = create_map(1)

m

### Survivor Bias ### Understanding the population travelling in these areas Could provide more Insights onto what's Next ### 