# Route Planning

After building the model to predict the timeslot of a crime at a location, we will use the hotspot as our main focus point for patrol. These hotspots will go through the timeslot to predict the timeslot of happening to ensure that police patrol go through the hotspots during patrolling in their respective timeslots. 

## Import the Libraries

In [137]:
import pandas as pd
import joblib
import pickle
import folium
from folium.plugins import AntPath



In [122]:
%load_ext autoreload
%autoreload 2
import sys; 
import sys; sys.path.insert(0, 'lib/')
from libs import genetic_algo

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [101]:
# These are the hotspots identified in AREA 1 (central).
hotspots_dict = {0: (34.036454545454546, -118.26722727272727), 1: (34.04208787878788, -118.2468686868687), 2: (34.05622727272727, -118.2376), 3: (34.048360606060605, -118.26308181818182), 4: (34.06184747474747, -118.2468090909091), 5: (34.06428181818182, -118.23791818181817), 6: (34.046469696969694, -118.25197272727273), 7: (34.045980808080806, -118.23947171717171), 8: (34.04959090909091, -118.24594848484848), 9: (34.04322222222222, -118.2692888888889)}
num_hotspots = len(hotspots_dict)
hotspots = []
for i in range(num_hotspots):
    hotspots.append(hotspots_dict[i])
print(hotspots)

[(34.036454545454546, -118.26722727272727), (34.04208787878788, -118.2468686868687), (34.05622727272727, -118.2376), (34.048360606060605, -118.26308181818182), (34.06184747474747, -118.2468090909091), (34.06428181818182, -118.23791818181817), (34.046469696969694, -118.25197272727273), (34.045980808080806, -118.23947171717171), (34.04959090909091, -118.24594848484848), (34.04322222222222, -118.2692888888889)]


In [96]:
# load the clusters data
df_area_1 = pd.read_pickle("clusters/area1_clusters_age.pkl")
df_area_1.head()

Unnamed: 0,AREA,Vict Age,LAT,LON,cluster
0,1,47,34.0444,-118.2628,9
5,1,25,34.0415,-118.262,9
50,1,22,34.0466,-118.259,3
101,1,59,34.057,-118.2508,4
117,1,36,34.0438,-118.2547,3


In [103]:
df_by_cluster = df_area_1.groupby("cluster").mean()
cluster_mean_age = df_by_cluster["Vict Age"].to_list()
cluster_mean_age = [int(age) for age in cluster_mean_age]
print(cluster_mean_age)

[37, 43, 38, 34, 37, 38, 37, 38, 38, 34]


In [26]:
age_group_by_cluster = df_area_1.groupby(by=["cluster", "age_group"]).count().sort_values(by=["cluster", "AREA"], ascending=False)
groups = age_group_by_cluster.reset_index()
groups_by_cluster = groups.groupby("cluster")
for i in range(10):
    cluster = groups_by_cluster.get_group(i)
    print(f"for cluster {i}: the top age group is: {cluster.iloc[0]["age_group"]}")

for cluster 0: the top age group is: Adult
for cluster 1: the top age group is: Middle Age
for cluster 2: the top age group is: Adult
for cluster 3: the top age group is: Adult
for cluster 4: the top age group is: Adult
for cluster 5: the top age group is: Adult
for cluster 6: the top age group is: Adult
for cluster 7: the top age group is: Adult
for cluster 8: the top age group is: Adult
for cluster 9: the top age group is: Adult


In [84]:
# the top groups are adult and middle age
age_range_area_1 = [i for i in range(0, 70, 5)]
print(age_range_area_1)

[0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65]


In [106]:
# "Vict Age", "LAT", "LON", "AREA", "Month OCC", "Day OCC", "time_slot"
def get_hotspot_data(cluster_mean_age, lat_long_list, area):
    num_hotspots = len(lat_long_list)
    lat, lon = map(list, zip(*lat_long_list))
    df = pd.DataFrame()
    
    for q in range(1,8):
        for r in range(1, 13):
            temp = pd.DataFrame({
                "Vict Age": cluster_mean_age,
                "LAT": lat,
                "LON": lon,
                "AREA": [area for _ in range(num_hotspots)],
                "Month OCC": [r for _ in range(num_hotspots)],
                "Day OCC": [q for _ in range(num_hotspots)]
            })
            df = pd.concat([df, temp], ignore_index=True)
    return df
    


In [107]:
testing_data = get_hotspot_data(cluster_mean_age, hotspots, 1)
print(testing_data.shape)
testing_data.head()


(840, 6)


Unnamed: 0,Vict Age,LAT,LON,AREA,Month OCC,Day OCC
0,37,34.036455,-118.267227,1,1,1
1,43,34.042088,-118.246869,1,1,1
2,38,34.056227,-118.2376,1,1,1
3,34,34.048361,-118.263082,1,1,1
4,37,34.061847,-118.246809,1,1,1


In [108]:
kmeans = joblib.load("clusters/kmeans_without_hr_model.pkl")

clusters = kmeans.predict(testing_data)
testing_data["cluster"] = clusters
print(len(clusters))

840


In [109]:

testing_data.head()

Unnamed: 0,Vict Age,LAT,LON,AREA,Month OCC,Day OCC,cluster
0,37,34.036455,-118.267227,1,1,1,2
1,43,34.042088,-118.246869,1,1,1,11
2,38,34.056227,-118.2376,1,1,1,2
3,34,34.048361,-118.263082,1,1,1,2
4,37,34.061847,-118.246809,1,1,1,2


In [61]:
# load the model
with open('models/best_model_clustering.pkl', 'rb') as file:
    time_slot_model = pickle.load(file)

print(time_slot_model)

Pipeline(steps=[('scaler', StandardScaler()),
                ('classifier',
                 GradientBoostingClassifier(max_depth=7,
                                            min_weight_fraction_leaf=0.01,
                                            n_estimators=50,
                                            random_state=42))])


In [112]:
features = ['Month OCC', 'Day OCC', 'LAT', 'LON', "cluster"]
test_X = testing_data[features]
X = pd.get_dummies(test_X, columns=['Month OCC', "Day OCC"], drop_first=True)
time_slots = time_slot_model.predict(X)

In [113]:
testing_data["time_slot"] = time_slots
print(testing_data.shape)
testing_data.head()


(840, 8)


Unnamed: 0,Vict Age,LAT,LON,AREA,Month OCC,Day OCC,cluster,time_slot
0,37,34.036455,-118.267227,1,1,1,2,1
1,43,34.042088,-118.246869,1,1,1,11,2
2,38,34.056227,-118.2376,1,1,1,2,1
3,34,34.048361,-118.263082,1,1,1,2,1
4,37,34.061847,-118.246809,1,1,1,2,1


In [125]:
set_0 = []
set_1= []
set_2 = []
for hs in hotspots:
    lat, lon = hs
    at_hs = testing_data[(testing_data["LAT"]==lat) & (testing_data["LON"]==lon)]
    time_slots = at_hs.groupby("time_slot").count().reset_index()["time_slot"].to_list()
    if 0 in time_slots:
        set_0.append((lat, lon))
    if 1 in time_slots:
        set_1.append((lat, lon))
    if 2 in time_slots:
        set_2.append((lat, lon))
               
print(f"Number of locations to visit in slot 0: {len(set_0)}")
print(f"Number of locations to visit in slot 1: {len(set_1)}")
print(f"Number of locations to visit in slot 2: {len(set_2)}")

whole_set = []
whole_set.append(set_0)
whole_set.append(set_1)
whole_set.append(set_2)
print(whole_set)

Number of locations to visit in slot 0: 2
Number of locations to visit in slot 1: 9
Number of locations to visit in slot 2: 8
[[(34.048360606060605, -118.26308181818182), (34.04322222222222, -118.2692888888889)], [(34.036454545454546, -118.26722727272727), (34.05622727272727, -118.2376), (34.048360606060605, -118.26308181818182), (34.06184747474747, -118.2468090909091), (34.06428181818182, -118.23791818181817), (34.046469696969694, -118.25197272727273), (34.045980808080806, -118.23947171717171), (34.04959090909091, -118.24594848484848), (34.04322222222222, -118.2692888888889)], [(34.036454545454546, -118.26722727272727), (34.04208787878788, -118.2468686868687), (34.05622727272727, -118.2376), (34.06184747474747, -118.2468090909091), (34.06428181818182, -118.23791818181817), (34.046469696969694, -118.25197272727273), (34.045980808080806, -118.23947171717171), (34.04959090909091, -118.24594848484848)]]


In [134]:
for i, slot_set in enumerate(whole_set):
    means = tuple(sum(x) / len(slot_set) for x in zip(*slot_set))
    print(means)
    m = folium.Map(location=[means[0], means[1]], zoom_start=15)
    colors = ['red', 'blue', 'green', 'purple', 'orange', 'darkred', 'lightred', 'darkblue', 'cadetblue', 'darkpurple']
    # Add points to the map
    count = 0
    for l in slot_set:
        count+=1
        print(l)
        folium.CircleMarker(
            location=(l[0], l[1]),
            radius=8,
            color=colors[i],
            fill=True,
            fill_color=colors[i],
            fill_opacity=0.6,
            popup=f"{l[0]}, {l[1]} in Slot {i}"
        ).add_to(m)
    print(count)
    m.save(f'slots/locations_slot_{i}.html')

(34.04579141414141, -118.26618535353535)
(34.048360606060605, -118.26308181818182)
(34.04322222222222, -118.2692888888889)
2
(34.05027059483726, -118.25103535353534)
(34.036454545454546, -118.26722727272727)
(34.05622727272727, -118.2376)
(34.048360606060605, -118.26308181818182)
(34.06184747474747, -118.2468090909091)
(34.06428181818182, -118.23791818181817)
(34.046469696969694, -118.25197272727273)
(34.045980808080806, -118.23947171717171)
(34.04959090909091, -118.24594848484848)
(34.04322222222222, -118.2692888888889)
9
(34.05036755050505, -118.24672702020202)
(34.036454545454546, -118.26722727272727)
(34.04208787878788, -118.2468686868687)
(34.05622727272727, -118.2376)
(34.06184747474747, -118.2468090909091)
(34.06428181818182, -118.23791818181817)
(34.046469696969694, -118.25197272727273)
(34.045980808080806, -118.23947171717171)
(34.04959090909091, -118.24594848484848)
8


## Planning the route for time slot 1

In [135]:
# Run the genetic algorithm
best_route, best_distance = genetic_algo.genetic_algorithm(set_1)
print("Best Route:", best_route)
print("Best Route Distance:", best_distance)

Generation 0, Best Distance: 11.67 km
Generation 10, Best Distance: 10.76 km
Generation 20, Best Distance: 8.73 km
Generation 30, Best Distance: 8.73 km
Generation 40, Best Distance: 8.73 km
Generation 50, Best Distance: 8.73 km
Generation 60, Best Distance: 8.73 km
Generation 70, Best Distance: 8.73 km
Generation 80, Best Distance: 8.73 km
Generation 90, Best Distance: 8.73 km
Generation 100, Best Distance: 8.73 km
Generation 110, Best Distance: 8.73 km
Generation 120, Best Distance: 8.73 km
Generation 130, Best Distance: 8.73 km
Generation 140, Best Distance: 8.73 km
Generation 150, Best Distance: 8.73 km
Generation 160, Best Distance: 8.73 km
Generation 170, Best Distance: 8.73 km
Generation 180, Best Distance: 8.73 km
Generation 190, Best Distance: 8.73 km
Generation 200, Best Distance: 8.73 km
Generation 210, Best Distance: 8.73 km
Generation 220, Best Distance: 8.73 km
Generation 230, Best Distance: 8.73 km
Generation 240, Best Distance: 8.73 km
Generation 250, Best Distance: 8.7

In [139]:
route = [set_1[i] for i in best_route]
for i, place in enumerate(route):
    print(f"Location: {i+1}: {place}")

Location: 1: (34.04322222222222, -118.2692888888889)
Location: 2: (34.036454545454546, -118.26722727272727)
Location: 3: (34.048360606060605, -118.26308181818182)
Location: 4: (34.046469696969694, -118.25197272727273)
Location: 5: (34.04959090909091, -118.24594848484848)
Location: 6: (34.045980808080806, -118.23947171717171)
Location: 7: (34.06184747474747, -118.2468090909091)
Location: 8: (34.06428181818182, -118.23791818181817)
Location: 9: (34.05622727272727, -118.2376)


In [138]:
# Starting point for the map (centered on the first coordinate)
map_center = route[0]
m = folium.Map(location=map_center, zoom_start=14)

# Add the route to the map
AntPath(route, color="blue", weight=2.5, opacity=1).add_to(m)

# Add markers for each point
for i, (lat, lon) in enumerate(route, start=1):
    folium.Marker(
        location=(lat, lon),
        popup=f"Point {i}",  # Popup text shows the sequence number
        icon=folium.Icon(color="blue", icon="info-sign"),
    ).add_to(m)
# Display map (if using Jupyter notebook, otherwise save to HTML)
m

## Planning Routes for Time Slot 2

In [144]:
# Run the genetic algorithm
best_route, best_distance = genetic_algo.genetic_algorithm(set_2)
print("Best Route:", best_route)
print("Best Route Distance:", best_distance)

Generation 0, Best Distance: 8.29 km
Generation 10, Best Distance: 8.29 km
Generation 20, Best Distance: 7.72 km
Generation 30, Best Distance: 7.68 km
Generation 40, Best Distance: 7.68 km
Generation 50, Best Distance: 7.68 km
Generation 60, Best Distance: 7.68 km
Generation 70, Best Distance: 7.68 km
Generation 80, Best Distance: 7.68 km
Generation 90, Best Distance: 7.68 km
Generation 100, Best Distance: 7.68 km
Generation 110, Best Distance: 7.68 km
Generation 120, Best Distance: 7.68 km
Generation 130, Best Distance: 7.68 km
Generation 140, Best Distance: 7.54 km
Generation 150, Best Distance: 7.54 km
Generation 160, Best Distance: 7.54 km
Generation 170, Best Distance: 7.54 km
Generation 180, Best Distance: 7.54 km
Generation 190, Best Distance: 7.54 km
Generation 200, Best Distance: 7.54 km
Generation 210, Best Distance: 7.54 km
Generation 220, Best Distance: 7.54 km
Generation 230, Best Distance: 7.54 km
Generation 240, Best Distance: 7.54 km
Generation 250, Best Distance: 7.54 

In [145]:
route = [set_2[i] for i in best_route]
for i, place in enumerate(route):
    print(f"Location: {i+1}: {place}")

Location: 1: (34.036454545454546, -118.26722727272727)
Location: 2: (34.046469696969694, -118.25197272727273)
Location: 3: (34.04208787878788, -118.2468686868687)
Location: 4: (34.045980808080806, -118.23947171717171)
Location: 5: (34.04959090909091, -118.24594848484848)
Location: 6: (34.05622727272727, -118.2376)
Location: 7: (34.06428181818182, -118.23791818181817)
Location: 8: (34.06184747474747, -118.2468090909091)


In [146]:
# Starting point for the map (centered on the first coordinate)
map_center = route[0]
m = folium.Map(location=map_center, zoom_start=14)

# Add the route to the map
AntPath(route, color="blue", weight=2.5, opacity=1).add_to(m)

# Add markers for each point
for i, (lat, lon) in enumerate(route, start=1):
    folium.Marker(
        location=(lat, lon),
        popup=f"Point {i}",  # Popup text shows the sequence number
        icon=folium.Icon(color="blue", icon="info-sign"),
    ).add_to(m)
# Display map (if using Jupyter notebook, otherwise save to HTML)
m