In [1]:
# !conda install -c conda-forge folium=0.10.0 geopy=1.20.0 --yes
from botocore.client import Config
from geopy.geocoders import Nominatim
from sklearn.cluster import KMeans
import folium
import ibm_boto3
import json
import matplotlib.cm as cm
import matplotlib.colors as colors
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import requests
import types

In [None]:
# The code was removed by Watson Studio for sharing.

Retrieving Mexico City's colonies data as a dataframe

In [None]:
def __iter__(self): return 0

body = client_c00e74e766a148bf886ec060f0b9c13a.get_object(Bucket='fashionproject-donotdelete-pr-mv2ccdtjv6sbh4',Key='colonias_cdmx_2019.csv')['Body']
# add missing __iter__ method, so pandas accepts body as file-like object
if not hasattr(body, "__iter__"): body.__iter__ = types.MethodType( __iter__, body )

cdmx_raw_df = pd.read_csv(body, sep=";")

In Mexico, neighborhoods are known as "colonies"

In [None]:
print("cdmx_raw_df.dtypes:")
print(cdmx_raw_df.dtypes)

print("cdmx_raw_df.shape:", cdmx_raw_df.shape)

cdmx_raw_df.head()

Displaying summary statistics for cdmx_raw_df

In [None]:
cdmx_raw_df.describe(include="all")

Counting missing data per column of interest

In [None]:
cdmx_missing_data_df = cdmx_raw_df.isna()

print("COLONIA:")
print(cdmx_missing_data_df["COLONIA"].value_counts())

print("Geo Point")
print(cdmx_missing_data_df["Geo Point"].value_counts())

print("ALCALDIA:")
print(cdmx_missing_data_df["ALCALDIA"].value_counts())

Showing the rows with missing data

In [None]:
cdmx_raw_df[cdmx_missing_data_df["Geo Point"]]

Cleaning and shaping cdmx_raw_df

In [None]:
# Removing columns of no interest
cdmx_clean_df = cdmx_raw_df.drop(["ENTIDAD", "Geo Shape", "CVE_ALC", "CVE_COL", "SECC_COM", "SECC_PAR"], axis=1)

# Creating a dictionary of translations from spanish to english for the column labels
colonies_column_labels_translations_es_en_dict = {
    "COLONIA": "Colony",
    "ALCALDIA": "Borough"
}

# Translating columns
cdmx_clean_df = cdmx_clean_df.rename(columns=colonies_column_labels_translations_es_en_dict)

# Spliting the Geo Point string to get the Latitude and Longitude strings
cdmx_clean_df["Colony Latitude"], cdmx_clean_df["Colony Longitude"] = cdmx_clean_df["Geo Point"].str.split(",").str

# Casting Latitude and Longitude as floats
cdmx_clean_df[["Colony Latitude", "Colony Longitude"]] = cdmx_clean_df[["Colony Latitude", "Colony Longitude"]].astype("float")

# Removing the Geo Point column
cdmx_clean_df = cdmx_clean_df.drop("Geo Point", axis=1)

# Rearranging the columns order
cdmx_clean_df = cdmx_clean_df.reindex(columns=["Borough", "Colony", "Colony Latitude", "Colony Longitude"])

# Since there are just 4 rows with a missing geo point, it is possible to retrieve them manually by using Google maps
cdmx_clean_df.iloc[1037, [2, 3]] = [19.4552146, -99.1288375]

cdmx_clean_df.iloc[1502, [2, 3]] = [19.182184, -99.081325]

cdmx_clean_df.iloc[1619, [2, 3]] = [19.2471687, -99.0214895]

cdmx_clean_df.iloc[1638, [2, 3]] = [19.273632, -99.1694116]

print("cdmx_clean_df.dtypes:")
print(cdmx_clean_df.dtypes)

print("cdmx_clean_df.shape:", cdmx_clean_df.shape)

cdmx_clean_df.head()

Counting the number of colonies per borough

In [None]:
print(cdmx_clean_df[["Borough", "Colony"]].groupby(["Borough"]).count())

Retrieving Cuauhtemoc's colonies

In [None]:
cuauhtemoc_colonies_df = cdmx_clean_df[cdmx_clean_df["Borough"] == "CUAUHTEMOC"].reset_index(drop=True)

print("cuauhtemoc_colonies_df.shape:")
print(cuauhtemoc_colonies_df.shape)

cuauhtemoc_colonies_df.head()

Defining Mexico City's location

In [None]:
cdmx_address = "Mexico City, CDMX"

cdmx_geolocator = Nominatim(user_agent="cdmx_explorer")

cdmx_location = cdmx_geolocator.geocode(cdmx_address)
print("cdmx_location:", cdmx_location)

cdmx_latitude = cdmx_location.latitude
print("cdmx_latitude:", cdmx_latitude)

cdmx_longitude = cdmx_location.longitude
print("cdmx_longitude:", cdmx_longitude)

Creating a map of Mexico City, showing the Cuauhtemoc's colonies

In [None]:
cdmx_map = folium.Map(location=[cdmx_latitude, cdmx_longitude], zoom_start=14)

for n_borough, n_colony, n_latitude, n_longitude in zip(cuauhtemoc_colonies_df["Borough"], cuauhtemoc_colonies_df["Colony"], cuauhtemoc_colonies_df["Colony Latitude"], cuauhtemoc_colonies_df["Colony Longitude"]):
    n_label = folium.Popup("{}, {}".format(n_borough, n_colony), parse_html=True)
    folium.CircleMarker(
        [n_latitude, n_longitude],
        radius=5,
        popup=n_label,
        color="blue",
        fill=True,
        fill_color="#3186cc",
        fill_opacity=0.7,
        parse_html=True
    ).add_to(cdmx_map)

cdmx_map

![alt text](https://raw.githubusercontent.com/iSaaC92G/Crime-Analysis-of-Mexico-City/master/images/outputs/cuauhtemoc_colonies.jpg "Cuauhtemoc Colonies")

Defining Foursquare credentials (CLIENT_ID, CLIENT_SECRET, VERSION). Hiding sensitive data

In [None]:
# The code was removed by Watson Studio for sharing.

Testing the Foursquare Get Venue Recommendations endpoint with the first row of the Cuauhtemoc's DataFrame

In [None]:
GET_VENUE_RECOMMENDATIONS_URL = "https://api.foursquare.com/v2/venues/explore"

def get_foursquare_endpoint_params(client_id, client_secret, version, latitude, longitude, radius=200, limit=10):
    """
    This function returns a dictionary with some of the parameters used by the Foursquare Get Venue Recommendations endpoint.
    """
    return dict(
        client_id=client_id,
        client_secret=client_secret,
        v=version,
        ll="{},{}".format(latitude, longitude),
        radius=radius,
        limit=limit
    )

# Using the location of the first colony in the cuauhtemoc_colonies_df DataFrame
params = get_foursquare_endpoint_params(CLIENT_ID, CLIENT_SECRET, VERSION, cuauhtemoc_colonies_df.loc[0, "Colony Latitude"], cuauhtemoc_colonies_df.loc[0, "Colony Longitude"])

# Making the request to the endpoint
response = requests.get(url=GET_VENUE_RECOMMENDATIONS_URL, params=params)

# Retrieving the response in JSON format
data = json.loads(response.text)

Analyzing the response data

In [None]:
print("data:")
print(data)

# So, the following path leads to the venues information
data["response"]["groups"][0]["items"][0]

Defining a function to retrieve recommended venues around given locations

In [None]:
def get_recommended_nearby_venues(names, latitudes, longitudes, radius=200, limit=10):
    """
    This function returns a DataFrame of venues by iterating over 3 lists: names, latitudes, longitudes (they must be of
    the same size!) and by making a request to the Foursquare Get Venue Recommendations endpoint for each location.
    """
    venues_list = []
    for n_name, n_latitude, n_longitude in zip(names, latitudes, longitudes):
        n_params = get_foursquare_endpoint_params(CLIENT_ID, CLIENT_SECRET, VERSION, n_latitude, n_longitude, radius, limit)
        n_response = requests.get(url=GET_VENUE_RECOMMENDATIONS_URL, params=n_params)
        n_data = json.loads(n_response.text)
        n_venues = n_data["response"]["groups"][0]["items"]
        venues_list.append([(
            n_name,
            n_latitude,
            n_longitude,
            n_venue["venue"]["name"],
            n_venue["venue"]["location"]["lat"],
            n_venue["venue"]["location"]["lng"],
            n_venue["venue"]["categories"][0]["name"]
        ) for n_venue in n_venues])
    venues_df = pd.DataFrame([n_venue for venue_list in venues_list for n_venue in venue_list])
    venues_df.columns = ["Colony", "Colony Latitude", "Colony Longitude", "Venue", "Venue Latitude", "Venue Longitude", "Venue Category"]
    return venues_df

Retrieving the top ten recommended venues around each of the Cuauhtemoc's colonies wihtin a radius of 200 meters

In [None]:
cuauhtemoc_venues_df = get_recommended_nearby_venues(cuauhtemoc_colonies_df["Colony"], cuauhtemoc_colonies_df["Colony Latitude"], cuauhtemoc_colonies_df["Colony Longitude"], 200)

print("cuauhtemoc_venues_df.shape:", cuauhtemoc_venues_df.shape)

cuauhtemoc_venues_df.head()

Counting venues per Colony

In [None]:
number_venues_per_colony_df = cuauhtemoc_venues_df[["Colony", "Venue"]].groupby("Colony").count()

number_venues_per_colony_df

Showing the distribution of the number of venues per colony

In [None]:
# number_venues_per_colony_df[["Venue"]].plot(kind="hist")

plt.hist(number_venues_per_colony_df["Venue"])

plt.xlabel("Number of venues")
plt.ylabel("Number of colonies per number of venues")

Showing the colonies with (at least) 10 points of interest

In [None]:
number_venues_per_colony_df[number_venues_per_colony_df["Venue"] >= 10]

Creating a map of Mexico City, showing Cuauhtemoc's venus

In [None]:
cuauhtemoc_venues_map = folium.Map(location=[cdmx_latitude, cdmx_longitude], zoom_start=14)

cvm_cuauhtemoc_colonies_srs = cuauhtemoc_colonies_df["Colony"]

cvm_colors_array = cm.hsv(np.linspace(0, 1, len(cvm_cuauhtemoc_colonies_srs)))
cvm_circle_marker_colors = [colors.rgb2hex(i) for i in cvm_colors_array]

for n_colony, n_venue, n_latitude, n_longitude in zip(cuauhtemoc_venues_df["Colony"], cuauhtemoc_venues_df["Venue"], cuauhtemoc_venues_df["Venue Latitude"], cuauhtemoc_venues_df["Venue Longitude"]):
    n_label = folium.Popup("{}, {}".format(n_colony, n_venue), parse_html=True)
    folium.CircleMarker(
        [n_latitude, n_longitude],
        radius=5,
        popup=n_label,
        color=cvm_circle_marker_colors[cvm_cuauhtemoc_colonies_srs[cvm_cuauhtemoc_colonies_srs == n_colony].index[0]],
        fill=True,
        fill_color=cvm_circle_marker_colors[cvm_cuauhtemoc_colonies_srs[cvm_cuauhtemoc_colonies_srs == n_colony].index[0]],
        fill_opacity=0.7,
        parse_html=True
    ).add_to(cuauhtemoc_venues_map)

cuauhtemoc_venues_map

![alt text](https://raw.githubusercontent.com/iSaaC92G/Crime-Analysis-of-Mexico-City/master/images/outputs/top_ten_venues_per_colonies.jpg "Top Ten Venues per Colonies")

Retrieving Cuauhtemoc's 2018 crime data set as a DataFrame

In [None]:
body = client_c00e74e766a148bf886ec060f0b9c13a.get_object(Bucket='fashionproject-donotdelete-pr-mv2ccdtjv6sbh4',Key='carpetas_de_investigacion_pgj_cdmx_cuauhtemoc_2018.csv')['Body']
# add missing __iter__ method, so pandas accepts body as file-like object
if not hasattr(body, "__iter__"): body.__iter__ = types.MethodType( __iter__, body )

cuauhtemoc_raw_crime_df = pd.read_csv(body, sep=";")

In [None]:
print("cuauhtemoc_raw_crime_df.dtypes:")
print(cuauhtemoc_raw_crime_df.dtypes)

print("cuauhtemoc_raw_crime_df.shape:", cuauhtemoc_raw_crime_df.shape)

cuauhtemoc_raw_crime_df.head()

Displaying summary statistics for cuauhtemoc_raw_crime_df

In [None]:
cuauhtemoc_raw_crime_df.describe(include="all")

Counting missing data per column of interest

In [None]:
crime_missing_data_df = cuauhtemoc_raw_crime_df.isna()

print("delito:")
print(crime_missing_data_df["delito"].value_counts())

print("categoria_delito:")
print(crime_missing_data_df["categoria_delito"].value_counts())

print("colonia_hechos:")
print(crime_missing_data_df["colonia_hechos"].value_counts())

Showing the rows with colony missing data

In [None]:
cuauhtemoc_raw_crime_df[crime_missing_data_df["colonia_hechos"]].head()

Cleaning and shaping cuauhtemoc_raw_crime_df

In [None]:
# Removing columns of no interest
cuauhtemoc_clean_crime_df = cuauhtemoc_raw_crime_df.drop(["año_hechos", "mes_hechos", "fiscalía", "agencia", "unidad_investigacion", "alcaldia_hechos", "fecha_inicio", "mes_inicio", "ao_inicio", "calle_hechos", "calle_hechos2", "Geopoint"], axis=1)

# Creating a dictionary of translations from spanish to english for the column labels
crimes_column_labels_translations_es_en_dict = {
    "fecha_hechos": "Crime Date",
    "delito": "Crime",
    "categoria_delito": "Crime Category",
    "colonia_hechos": "Crime Colony",
    "longitud": "Crime Longitude",
    "latitud": "Crime Latitude"
}

# Translating columns
cuauhtemoc_clean_crime_df = cuauhtemoc_clean_crime_df.rename(columns=crimes_column_labels_translations_es_en_dict)

# Removing the rows with missing colony
cuauhtemoc_clean_crime_df = cuauhtemoc_clean_crime_df[crime_missing_data_df["colonia_hechos"] == False]

print("cuauhtemoc_clean_crime_df.dtypes:")
print(cuauhtemoc_clean_crime_df.dtypes)

print("cuauhtemoc_clean_crime_df.shape:")
print(cuauhtemoc_clean_crime_df.shape)

cuauhtemoc_clean_crime_df.head()

Counting the number of unique Crime categories, Crimes and Crime colonies

In [None]:
cuauhtemoc_crime_columns_df = cuauhtemoc_clean_crime_df[["Crime Category", "Crime", "Crime Colony"]]

print("Number of unique Crime categories:", cuauhtemoc_crime_columns_df["Crime Category"].nunique())
print(cuauhtemoc_crime_columns_df[["Crime Category", "Crime"]].groupby("Crime Category").count())

# print("pd.unique(cuauhtemoc_crime_columns_df[\"Crime Category\"]):")
# print(pd.unique(cuauhtemoc_crime_columns_df["Crime Category"]))

print("Number of unique Crimes:", cuauhtemoc_crime_columns_df["Crime"].nunique())
print(cuauhtemoc_crime_columns_df[["Crime", "Crime Colony"]].groupby("Crime").count())

print("Number of unique Crime colonies:", cuauhtemoc_crime_columns_df["Crime Colony"].nunique())
print(cuauhtemoc_crime_columns_df[["Crime", "Crime Colony"]].groupby("Crime Colony").count())

Removing unwanted data

In [None]:
# These categories are not consider a threat to tourists
cuauhtemoc_crime_df = cuauhtemoc_clean_crime_df[cuauhtemoc_clean_crime_df["Crime Category"] != "HECHO NO DELICTIVO"]
cuauhtemoc_crime_df = cuauhtemoc_crime_df[cuauhtemoc_crime_df["Crime Category"] != "ROBO A REPARTIDOR CON Y SIN VIOLENCIA"]
cuauhtemoc_crime_df = cuauhtemoc_crime_df[cuauhtemoc_crime_df["Crime Category"] != "ROBO A TRANSPORTISTA CON Y SIN VIOLENCIA"]

print("Number of unique Crime categories:", cuauhtemoc_crime_df["Crime Category"].nunique())
print(cuauhtemoc_crime_df[["Crime Category", "Crime"]].groupby("Crime Category").count())

print("Number of unique Crimes:", cuauhtemoc_crime_df["Crime"].nunique())
print(cuauhtemoc_crime_df[["Crime Category", "Crime"]].groupby("Crime").count())

print("Number of unique Crime colonies:", cuauhtemoc_crime_df["Crime Colony"].nunique())
print(cuauhtemoc_crime_df[["Crime Colony", "Crime Category"]].groupby("Crime Colony").count())

print("cuauhtemoc_crime_df.shape:")
print(cuauhtemoc_crime_df.shape)

cuauhtemoc_crime_df.head()

Transforming the cuauhtemoc_crime_df DataFrame to set Crime categories as columns

In [None]:
cuauhtemoc_crime_categories_df = pd.get_dummies(cuauhtemoc_crime_df[["Crime Category"]], prefix="", prefix_sep="")

cuauhtemoc_crime_categories_df.insert(0, "Crime Colony", cuauhtemoc_crime_df["Crime Colony"])

print("cuauhtemoc_crime_categories_df.shape:")
print(cuauhtemoc_crime_categories_df.shape)

cuauhtemoc_crime_categories_df.head()

Grouping rows by Crime Colony and calculating the mean for each Crime category

In [None]:
cuauhtemoc_crime_grouped_by_colony_df = cuauhtemoc_crime_categories_df.groupby("Crime Colony").mean().reset_index()

print("cuauhtemoc_crime_grouped_by_colony_df.shape:")
print(cuauhtemoc_crime_grouped_by_colony_df.shape)

cuauhtemoc_crime_grouped_by_colony_df

Fitting the KMeans model

In [None]:
k_clusters = 7

cuauhtemoc_crime_grouped_to_cluster_df = cuauhtemoc_crime_grouped_by_colony_df.drop("Crime Colony", 1)

kmeans_model = KMeans(n_clusters=k_clusters, random_state=0).fit(cuauhtemoc_crime_grouped_to_cluster_df)

kmeans_model.labels_

Creating a dictionary of labeled colonies

In [None]:
cuauhtemoc_labeled_venues_df = pd.DataFrame(data={"Colony": cuauhtemoc_crime_grouped_by_colony_df["Crime Colony"], "Cluster Label": kmeans_model.labels_})

cuauhtemoc_labeled_venues_df["Colony"] = cuauhtemoc_labeled_venues_df["Colony"].apply(lambda x: x.replace("AMPLIACIÓN ASTURIAS", "ASTURIAS (AMPL)"))
cuauhtemoc_labeled_venues_df["Colony"] = cuauhtemoc_labeled_venues_df["Colony"].apply(lambda x: x.replace("CUAUHTÉMOC", "CUAUHTEMOC"))
cuauhtemoc_labeled_venues_df["Colony"] = cuauhtemoc_labeled_venues_df["Colony"].apply(lambda x: x.replace("EX-HIPÓDROMO DE PERALVILLO", "EX HIPODROMO DE PERALVILLO"))
cuauhtemoc_labeled_venues_df["Colony"] = cuauhtemoc_labeled_venues_df["Colony"].apply(lambda x: x.replace("HIPÓDROMO", "HIPODROMO"))
cuauhtemoc_labeled_venues_df["Colony"] = cuauhtemoc_labeled_venues_df["Colony"].apply(lambda x: x.replace("HIPÓDROMO CONDESA", "HIPODROMO CONDESA"))
cuauhtemoc_labeled_venues_df["Colony"] = cuauhtemoc_labeled_venues_df["Colony"].apply(lambda x: x.replace("JUÁREZ", "JUAREZ"))
cuauhtemoc_labeled_venues_df["Colony"] = cuauhtemoc_labeled_venues_df["Colony"].apply(lambda x: x.replace("NONOALCO TLATELOLCO", "NONOALCO-TLATELOLCO"))
cuauhtemoc_labeled_venues_df["Colony"] = cuauhtemoc_labeled_venues_df["Colony"].apply(lambda x: x.replace("SAN SIMÓN TOLNAHUAC", "SAN SIMON TOLNAHUAC"))
cuauhtemoc_labeled_venues_df["Colony"] = cuauhtemoc_labeled_venues_df["Colony"].apply(lambda x: x.replace("VALLE GÓMEZ", "VALLE GOMEZ"))

cuauhtemoc_labeled_venues_dict = cuauhtemoc_labeled_venues_df.to_dict("split")

cuauhtemoc_labeled_venues_dict = {d[0]:d[1] for d in cuauhtemoc_labeled_venues_dict["data"]}

cuauhtemoc_labeled_venues_dict

Adding the KMeans labels to the Cuauhtemoc venues dataframe

In [None]:
cuauhtemoc_venues_labeled_colonies_df = cuauhtemoc_venues_df

def get_cluster_label(colony):
    for key, value in cuauhtemoc_labeled_venues_dict.items():
        if colony.startswith(key):
            return value

cuauhtemoc_venues_labeled_colonies_df["Cluster Label"] = cuauhtemoc_venues_labeled_colonies_df["Colony"].apply(lambda x: get_cluster_label(x))

# cuauhtemoc_venues_labeled_colonies_df[cuauhtemoc_venues_labeled_colonies_df.isnull().any(axis=1)]

cuauhtemoc_venues_labeled_colonies_df = cuauhtemoc_venues_labeled_colonies_df.dropna()

cuauhtemoc_venues_labeled_colonies_df["Cluster Label"] = cuauhtemoc_venues_labeled_colonies_df["Cluster Label"].astype("int64")

print("cuauhtemoc_venues_labeled_colonies_df.shape:")
print(cuauhtemoc_venues_labeled_colonies_df.shape)

cuauhtemoc_venues_labeled_colonies_df

Creating a map of Mexito City showing Cuauhtemoc's clusterd colonies

In [None]:
cuauhtemoc_clusters_map = folium.Map(location=[cdmx_latitude, cdmx_longitude], zoom_start=14)

colors_array = cm.rainbow(np.linspace(0, 0.8, k_clusters))
circle_marker_colors = [colors.rgb2hex(i) for i in colors_array]

# print("circle_marker_colors:")
# print(circle_marker_colors)

for n_colony, n_venue, n_latitude, n_longitude, n_cluster_label in zip(cuauhtemoc_venues_labeled_colonies_df["Colony"], cuauhtemoc_venues_labeled_colonies_df["Venue"], cuauhtemoc_venues_labeled_colonies_df["Venue Latitude"], cuauhtemoc_venues_labeled_colonies_df["Venue Longitude"], cuauhtemoc_venues_labeled_colonies_df["Cluster Label"]):
    n_label = folium.Popup("Colony: {}, Venue: {}, Cluster: {}".format(n_colony, n_venue, n_cluster_label), parse_html=True)
    folium.CircleMarker(
        [n_latitude, n_longitude],
        radius=5,
        popup=n_label,
        color=circle_marker_colors[n_cluster_label],
        fill=True,
        fill_color=circle_marker_colors[n_cluster_label],
        fill_opacity=1,
        parse_html=True
    ).add_to(cuauhtemoc_clusters_map)

cuauhtemoc_clusters_map

![alt text](https://raw.githubusercontent.com/iSaaC92G/Crime-Analysis-of-Mexico-City/master/images/outputs/classified_top_ten_venues_per_colony.jpg "Classified Top Ten Venues per Colony")

Grouping crimes by clusters

In [None]:
cuauhtemoc_labeled_colonies_df = pd.DataFrame(data={"Colony": cuauhtemoc_crime_grouped_by_colony_df["Crime Colony"], "Cluster Label": kmeans_model.labels_})

cuauhtemoc_labeled_colonies_dict = cuauhtemoc_labeled_colonies_df.to_dict("split")

cuauhtemoc_labeled_colonies_dict = {d[0]:d[1] for d in cuauhtemoc_labeled_colonies_dict["data"]}

print(cuauhtemoc_labeled_colonies_dict)


cuauhtemoc_crime_labeled_colonies_df = cuauhtemoc_crime_df.copy()

def get_cluster_label(colony, colony_cluster_label_dict):
    for key, value in colony_cluster_label_dict.items():
        if colony.startswith(key):
            return value

cuauhtemoc_crime_labeled_colonies_df["Cluster Label"] = cuauhtemoc_crime_labeled_colonies_df["Crime Colony"].apply(lambda x: get_cluster_label(x, cuauhtemoc_labeled_colonies_dict))

# cuauhtemoc_crime_labeled_colonies_df[cuauhtemoc_crime_labeled_colonies_df.isnull().any(axis=1)]

cuauhtemoc_crime_labeled_colonies_df = cuauhtemoc_crime_labeled_colonies_df.dropna()

cuauhtemoc_crime_labeled_colonies_df["Cluster Label"] = cuauhtemoc_crime_labeled_colonies_df["Cluster Label"].astype("int64")

print("cuauhtemoc_crime_labeled_colonies_df.shape:")
print(cuauhtemoc_crime_labeled_colonies_df.shape)

cuauhtemoc_crime_labeled_colonies_df.head()

Counting the number of crimes by cluster

In [None]:
cuauhtemoc_crime_labeled_colonies_df[["Crime Colony", "Cluster Label"]].groupby("Cluster Label").count()

Cluster's colonies ordered by crime rates

In [None]:
cuauhtemoc_high_crime_rates_by_colony_df = cuauhtemoc_crime_labeled_colonies_df[np.logical_or(np.logical_or(cuauhtemoc_crime_labeled_colonies_df["Cluster Label"] == 1, cuauhtemoc_crime_labeled_colonies_df["Cluster Label"] == 3), cuauhtemoc_crime_labeled_colonies_df["Cluster Label"] == 4)]

cuauhtemoc_high_crime_rates_by_colony_df = cuauhtemoc_high_crime_rates_by_colony_df[["Crime Category", "Crime Colony"]]

cuauhtemoc_high_crime_rates_by_colony_df = cuauhtemoc_high_crime_rates_by_colony_df.groupby("Crime Colony").count().reset_index()

cuauhtemoc_high_crime_rates_by_colony_df.rename(columns={"Crime Category": "Number of Crimes by Colony"}, inplace=True)

cuauhtemoc_high_crime_rates_by_colony_df.sort_values(by=["Number of Crimes by Colony"], ascending=False, inplace=True)

cuauhtemoc_high_crime_rates_by_colony_df.reset_index(drop=True)

Showing the venues found in those clusters

In [None]:
clusters_with_high_crime_rates_df = cuauhtemoc_venues_labeled_colonies_df[np.logical_or(np.logical_or(cuauhtemoc_venues_labeled_colonies_df["Cluster Label"] == 1, cuauhtemoc_venues_labeled_colonies_df["Cluster Label"] == 3), cuauhtemoc_venues_labeled_colonies_df["Cluster Label"] == 4)].reset_index()

print("clusters_with_high_crime_rates_df.shape:", clusters_with_high_crime_rates_df.shape)

clusters_with_high_crime_rates_df["Venue"].to_list()