In [114]:
!pip install folium 
import folium
import requests
import pandas as pd
import numpy as np
import random
from folium.map import *



# Part 1: Getting some Data

In [115]:
#The dataset that I choose is the arrest dataset of Balitimore and it has 353302 rows in total with Latitude and Longitud
data_table = pd.read_csv('https://data.baltimorecity.gov/datasets/baltimore::arrests.csv')
print(data_table)

              X        Y   RowID  ArrestNumber   Age Gender Race  \
0      -76.6634  39.2944       1    22000022.0  18.0      M    B   
1      -76.5987  39.3599       2    22000010.0  26.0      M    B   
2           NaN      NaN       3    21168317.0  24.0      M    B   
3           NaN      NaN       4    21168315.0  28.0      M    B   
4           NaN      NaN       5    21168316.0  28.0      M    B   
...         ...      ...     ...           ...   ...    ...  ...   
353297 -76.6616  39.3275  353298    10000039.0  17.0      M    B   
353298 -76.6616  39.3275  353299    10000035.0  68.0      M    B   
353299 -76.5876  39.3125  353300    10000018.0  25.0      M    B   
353300 -76.6837  39.2979  353301    10000013.0  28.0      M    B   
353301 -76.6327  39.3035  353302    10000083.0  21.0      M    B   

                ArrestDateTime      ArrestLocation         IncidentOffence  \
0       2021/12/31 23:14:00+00  2800 EDMONDSON AVE         Unknown Offense   
1       2021/12/31 22:09:00

In [116]:
#Chcek if all the row have the value for location
data_table[pd.notnull(data_table["GeoLocation"])]["GeoLocation"].count()
data_table[pd.notnull(data_table["Latitude"])]["Latitude"].count()
data_table[pd.notnull(data_table["Longitude"])]["Longitude"].count()
#check for part 3
data_table[pd.notnull(data_table["Race"])]["Race"].count()
data_table[pd.notnull(data_table["Gender"])]["Gender"].count()

#After check if all the row have value for the GeoLocation and Latitude and Longitude, we didn't get 353302 row which means
#we still have some missing value for these three column, so we need to drop those rows.
#In order to make a better map with marked information, I also need to drop the rows with missing values for Race and Gender
data_table.dropna(subset=["Longitude", "Race", "Gender"],inplace = True) 

#save the cvs file after drop null values
data_table.to_csv('https://data.baltimorecity.gov/datasets/baltimore::arrests.csv', index = False)


data_table

Unnamed: 0,X,Y,RowID,ArrestNumber,Age,Gender,Race,ArrestDateTime,ArrestLocation,IncidentOffence,IncidentLocation,Charge,ChargeDescription,District,Post,Neighborhood,Latitude,Longitude,GeoLocation,Shape
0,-76.6634,39.2944,1,22000022.0,18.0,M,B,2021/12/31 23:14:00+00,2800 EDMONDSON AVE,Unknown Offense,2800 EDMONDSON AVE,1 0493,HGV,Western,721,Penrose/Fayette Street Outreach,39.2944,-76.6634,"(39.2944,-76.6634)",
1,-76.5987,39.3599,2,22000010.0,26.0,M,B,2021/12/31 22:09:00+00,5700 NORTHWOOD DR,Unknown Offense,5700 NORTHWOOD DR,1 5212,HGV,Northern,523,Chinquapin Park,39.3599,-76.5987,"(39.3599,-76.5987)",
6,-76.6343,39.3124,7,21168306.0,44.0,M,B,2021/12/31 16:16:00+00,2200 LINDEN AVE,Unknown Offense,2200 LINDEN AVE,2 0480,MOTOR VEHICLE UNLAW( STOLEN AU,Central,133,Reservoir Hill,39.3124,-76.6343,"(39.3124,-76.6343)",
9,-76.5609,39.2944,10,21168297.0,36.0,M,U,2021/12/31 14:50:00+00,100 JANNEY ST,Unknown Offense,100 JANNEY ST,1 1415,AGG ASSAULT,Southeast,232,Kresson,39.2944,-76.5609,"(39.2944,-76.5609)",
10,-76.5879,39.3181,11,22000016.0,25.0,M,B,2021/12/31 12:51:00+00,2700 ST. LO DR,Unknown Offense,2700 ST. LO DR,1 5212,HGV,Northeast,431,Clifton Park,39.3181,-76.5879,"(39.3181,-76.5879)",
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
353297,-76.6616,39.3275,353298,10000039.0,17.0,M,B,2010/01/01 00:05:00+00,2900 NORFOLK AV,54ARMED PERSON,2900 NORFOLK AV,1 5212,HGV,Northwest,612,Park Circle,39.3275,-76.6616,"(39.3275,-76.6616)",
353298,-76.6616,39.3275,353299,10000035.0,68.0,M,B,2010/01/01 00:05:00+00,2900 NORFOLK AV,54ARMED PERSON,2900 NORFOLK AV,,HGV,Northwest,612,Park Circle,39.3275,-76.6616,"(39.3275,-76.6616)",
353299,-76.5876,39.3125,353300,10000018.0,25.0,M,B,2010/01/01 00:01:00+00,1900 COLLINGTON AV,87NARCOTICS,1900 COLLINGTON AV,2A0696,DISTRIBUTION HERION,Eastern,331,South Clifton Park,39.3125,-76.5876,"(39.3125,-76.5876)",
353300,-76.6837,39.2979,353301,10000013.0,28.0,M,B,2010/01/01 00:01:00+00,900 WILDWOOD PW,79OTHER,900 WILDWOOD PW,2 5212,HANDGUN VIOLATION,Southwest,815,Edmondson Village,39.2979,-76.6837,"(39.2979,-76.6837)",


# Part 2: Making a Map

In [117]:
#create the map
map_osm = folium.Map(location=[39.29, -76.61], zoom_start=13)
map_osm

# Part 3: Combining Parts 1 and 2

In [118]:
#check how many races we have in our datset
arr = data_table['Race'].tolist()
arr=np.unique(arr).tolist()
arr

['A', 'B', 'H', 'I', 'U', 'W']

In [119]:
#choose 200 random sample from the dataset
#lable each race and gender with different color

for index, row in data_table.sample(200).iterrows():

    if (row['Race'] == 'A'):
        folium.Marker(location=[row['Latitude'], row['Longitude']], popup="Arrest #: " + str(row['ArrestNumber']) + "\n" 
                      + "Race: " + row['Race'] + "\n" + "Age: " + str(row['Age']) + "\n",
                      icon=folium.Icon(color="beige")).add_to(map_osm)
        if (row['Gender'] == 'M'):
            folium.Circle(radius=100, location= [row['Latitude'], row['Longitude']], popup=row['ArrestLocation'], color='blue', fill=True).add_to(map_osm)
        else:
            folium.Circle(radius=100, location= [row['Latitude'], row['Longitude']], popup=row['ArrestLocation'], color='red', fill=True).add_to(map_osm)
            
    elif (row['Race'] == 'B'):
        folium.Marker(location=[row['Latitude'], row['Longitude']], popup="Arrest #: " + str(row['ArrestNumber']) + "\n"
                      + "Race: " + row['Race'] + "\n" + "Age: " + str(row['Age']) + "\n",
                      icon=folium.Icon(color="black")).add_to(map_osm)
        if (row['Gender'] == 'M'):
            folium.Circle(radius=100, location= [row['Latitude'], row['Longitude']], popup=row['ArrestLocation'], color='blue', fill=True).add_to(map_osm)
        else:
            folium.Circle(radius=100, location= [row['Latitude'], row['Longitude']], popup=row['ArrestLocation'], color='red', fill=True).add_to(map_osm)
            
    elif (row['Race'] == 'H'):
        folium.Marker(location=[row['Latitude'], row['Longitude']], popup="Arrest #: " + str(row['ArrestNumber']) + "\n" 
                      + "Race: " + row['Race'] + "\n" + "Age: " + str(row['Age']) + "\n",
                      icon=folium.Icon(color="orange")).add_to(map_osm)
        if (row['Gender'] == 'M'):
            folium.Circle(radius=100, location= [row['Latitude'], row['Longitude']], popup=row['ArrestLocation'], color='blue', fill=True).add_to(map_osm)
        else:
            folium.Circle(radius=100, location= [row['Latitude'], row['Longitude']], popup=row['ArrestLocation'], color='red', fill=True).add_to(map_osm)
        
    elif (row['Race'] == 'I'):
        folium.Marker(location=[row['Latitude'], row['Longitude']], popup="Arrest #: " + str(row['ArrestNumber']) + "\n" 
                      + "Race: " + row['Race'] + "\n" + "Age: " + str(row['Age']) + "\n",
                      icon=folium.Icon(color="green")).add_to(map_osm)
        if (row['Gender'] == 'M'):
            folium.Circle(radius=100, location= [row['Latitude'], row['Longitude']], popup=row['ArrestLocation'], color='blue', fill=True).add_to(map_osm)
        else:
            folium.Circle(radius=100, location= [row['Latitude'], row['Longitude']], popup=row['ArrestLocation'], color='red', fill=True).add_to(map_osm)
        
    elif (row['Race'] == 'U'):
        folium.Marker(location=[row['Latitude'], row['Longitude']], popup="Arrest #: " + str(row['ArrestNumber']) + "\n" 
                      + "Race: " + row['Race'] + "\n" + "Age: " + str(row['Age']) + "\n",
                      icon=folium.Icon(color="purple")).add_to(map_osm)
        if (row['Gender'] == 'M'):
            folium.Circle(radius=100, location= [row['Latitude'], row['Longitude']], popup=row['ArrestLocation'], color='blue', fill=True).add_to(map_osm)
        else:
            folium.Circle(radius=100, location= [row['Latitude'], row['Longitude']], popup=row['ArrestLocation'], color='red', fill=True).add_to(map_osm)
            
    elif (row['Race'] == 'W'):
        folium.Marker(location=[row['Latitude'], row['Longitude']], popup="Arrest #: " + str(row['ArrestNumber']) + "\n" 
                      + "Race: " + row['Race'] + "\n" + "Age: " + str(row['Age']) + "\n",
                      icon=folium.Icon(color="white")).add_to(map_osm)
        if (row['Gender'] == 'M'):
            folium.Circle(radius=100, location= [row['Latitude'], row['Longitude']], popup=row['ArrestLocation'], color='blue', fill=True).add_to(map_osm)
        else:
            folium.Circle(radius=100, location= [row['Latitude'], row['Longitude']], popup=row['ArrestLocation'], color='red', fill=True).add_to(map_osm)
    
map_osm

For this map, first I choose 200 random sample from the table. I use 2 indicators, race and gender. Marker is the race and the circle is the gender, each indicator has a corresponding color. When click the mark will show the info of the person arrested and the circle will show the location. Overall, looks like the most crimes are committed by Black and Male while the other races have lower criminal offense rate. However, we know that over 63% of Baltimore's population is black which means black people are more likely to commit crimes than other races. In addition, most arrest cases in Baltimore happened around the center of the city. This is the basic observations based on the interactive folium maps. 
