In [1]:
import os
import time
from tqdm import tqdm

import shapely
import numpy as np
import pandas as pd
import geopandas as gpd

import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
sns.set_theme()
sns.set_style("white")
sns.set_palette("rocket")

In [3]:
crime_data_path = "/Users/administrator/Documents/Projects/sf-crime-exploration/data/SFPD_Crime_Data_Full.csv"

start_time = time.time()
dataframe = pd.read_csv(crime_data_path)
end_time = time.time()
print("Entire operation took {} seconds.".format(end_time - start_time))

Entire operation took 2.6985280513763428 seconds.


In [4]:
print(dataframe.head(5))

         Date   Time Day of Week           Category Police District  \
0  2011/07/01  08:00      FRIDAY            WARRANT        NORTHERN   
1  2005/10/18  14:30     TUESDAY       DRUG OFFENSE      TENDERLOIN   
2  2005/01/29  13:45    SATURDAY  VEHICLE MISPLACED         BAYVIEW   
3  2011/06/02  02:52    THURSDAY       NON-CRIMINAL         CENTRAL   
4  2003/02/01  08:00    SATURDAY  VEHICLE MISPLACED         BAYVIEW   

    Latitude   Longitude  
0  37.802151 -122.439758  
1  37.779944 -122.414318  
2  37.737576 -122.388799  
3  37.803109 -122.414354  
4  37.724556 -122.401097  


In [6]:
# Get the unique columns.
list(dataframe["Category"].unique())

['WARRANT',
 'DRUG OFFENSE',
 'VEHICLE MISPLACED',
 'NON-CRIMINAL',
 'LARCENY THEFT',
 'BURGLARY',
 'OTHER OFFENSES',
 'VANDALISM',
 'PROSTITUTION',
 'SUSPICIOUS OCC',
 'DRUNKENNESS',
 'ARSON',
 'EMBEZZLEMENT',
 'FRAUD',
 'ASSAULT',
 'MISSING PERSON',
 'ROBBERY',
 'FORGERY AND COUNTERFEITING',
 'SECONDARY CODES',
 'SEX OFFENSE',
 'WEAPONS OFFENSE',
 'DISORDERLY CONDUCT',
 'STOLEN PROPERTY',
 'LOITERING',
 'SUICIDE',
 'TRESPASS',
 'LIQUOR LAWS',
 'BAD CHECKS',
 'KIDNAPPING',
 'RECOVERED VEHICLE',
 'DRIVING UNDER THE INFLUENCE',
 'FAMILY OFFENSE',
 'BRIBERY',
 'GAMBLING',
 'EXTORTION',
 'PORNOGRAPHY/OBSCENE MAT',
 'TREA',
 'LOST PROPERTY',
 'MALICIOUS MISCHIEF',
 'OTHER',
 'OFFENCES AGAINST THE FAMILY AND CHILDREN',
 'MISCELLANEOUS INVESTIGATION',
 'OTHER MISCELLANEOUS',
 'MOTOR VEHICLE THEFT',
 'CASE CLOSURE',
 'TRAFFIC VIOLATION ARREST',
 'FIRE REPORT',
 'TRAFFIC COLLISION',
 'WEAPONS CARRYING ETC',
 'COURTESY REPORT',
 'VEHICLE IMPOUNDED',
 'RAPE',
 'HUMAN TRAFFICKING (A), COMMERCIAL 

In [11]:
start_time = time.time()
filtered = dataframe[dataframe["Police District"] == "SOUTHERN"]
count = len(filtered[filtered["Category"] == "LARCENY THEFT"])
end_time = time.time()
print(count)
print("Entire operation took {} seconds.".format(end_time - start_time))

124306
Entire operation took 0.2665090560913086 seconds.


In [12]:
start_time = time.time()
districts = list(dataframe["Police District"].unique())
end_time = time.time()
print("First operation took {} seconds.".format(end_time - start_time))

start_time = time.time()
categories = list(dataframe["Category"].unique())
end_time = time.time()
print("Second operation took {} seconds.".format(end_time - start_time))

First operation took 0.18750786781311035 seconds.
Second operation took 0.14093589782714844 seconds.


In [16]:
start_time = time.time()
crimes_by_district = {}
for district in tqdm(districts):
    filtered_frame = dataframe[dataframe["Police District"] == district]
    crime_category_distribution = {}
    for category in categories:
        crime_category_distribution[category] = len(
            filtered_frame[filtered_frame["Category"] == category]
        )
    crimes_by_district[district] = crime_category_distribution
end_time = time.time()

print("\nEntire operation took {} seconds.".format(end_time - start_time))

100%|██████████| 10/10 [00:11<00:00,  1.17s/it]
Entire operation took 11.758426904678345 seconds.



In [17]:
print(crimes_by_district)

{'NORTHERN': {'WARRANT': 11292, 'DRUG OFFENSE': 8310, 'VEHICLE MISPLACED': 11635, 'NON-CRIMINAL': 28474, 'LARCENY THEFT': 101124, 'BURGLARY': 17254, 'OTHER OFFENSES': 28097, 'VANDALISM': 14281, 'PROSTITUTION': 3024, 'SUSPICIOUS OCC': 8933, 'DRUNKENNESS': 814, 'ARSON': 494, 'EMBEZZLEMENT': 360, 'FRAUD': 6602, 'ASSAULT': 20681, 'MISSING PERSON': 3693, 'ROBBERY': 7221, 'FORGERY AND COUNTERFEITING': 2873, 'SECONDARY CODES': 2319, 'SEX OFFENSE': 1107, 'WEAPONS OFFENSE': 1856, 'DISORDERLY CONDUCT': 2053, 'STOLEN PROPERTY': 2083, 'LOITERING': 401, 'SUICIDE': 215, 'TRESPASS': 2129, 'LIQUOR LAWS': 266, 'BAD CHECKS': 137, 'KIDNAPPING': 489, 'RECOVERED VEHICLE': 1822, 'DRIVING UNDER THE INFLUENCE': 583, 'FAMILY OFFENSE': 90, 'BRIBERY': 52, 'GAMBLING': 22, 'EXTORTION': 65, 'PORNOGRAPHY/OBSCENE MAT': 7, 'TREA': 1, 'LOST PROPERTY': 1421, 'MALICIOUS MISCHIEF': 3336, 'OTHER': 421, 'OFFENCES AGAINST THE FAMILY AND CHILDREN': 732, 'MISCELLANEOUS INVESTIGATION': 470, 'OTHER MISCELLANEOUS': 2804, 'MOTOR V