# Installing Dependencies, Importing Required Libraries and Loading Dataset

In [1]:
!pip install holidays
!pip install prophet==1.1.6



In [2]:
!pip show prophet

Name: prophet
Version: 1.1.6
Summary: Automatic Forecasting Procedure
Home-page: https://facebook.github.io/prophet/
Author: 
Author-email: "Sean J. Taylor" <sjtz@pm.me>, Ben Letham <bletham@fb.com>
License: MIT
Location: /usr/local/lib/python3.11/dist-packages
Requires: cmdstanpy, holidays, importlib-resources, matplotlib, numpy, pandas, tqdm
Required-by: 


In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import holidays
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.arima.model import ARIMA
from prophet import Prophet
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, mean_squared_error, mean_absolute_percentage_error
from sklearn.model_selection import train_test_split
import folium
from folium.plugins import HeatMap
import plotly.express as px

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [4]:
df = pd.read_csv('/content/drive/MyDrive/GDG/Research Paper/Datasets/Crime_Data_from_2010_to_2019.csv')

# Data Preprocessing

In [7]:
df.head()

Unnamed: 0,DR_NO,Date Rptd,DATE OCC,TIME OCC,AREA,AREA NAME,Rpt Dist No,Part 1-2,Crm Cd,Crm Cd Desc,Mocodes,Vict Age,Vict Sex,Vict Descent,Premis Cd,Premis Desc,Weapon Used Cd,Weapon Desc,Status,Status Desc,Crm Cd 1,Crm Cd 2,Crm Cd 3,Crm Cd 4,LOCATION,Cross Street,LAT,LON
0,1307355,02/20/2010 12:00:00 AM,02/20/2010 12:00:00 AM,1350,13,Newton,1385,2,900,VIOLATION OF COURT ORDER,0913 1814 2000,48,M,H,501.0,SINGLE FAMILY DWELLING,,,AA,Adult Arrest,900.0,,,,300 E GAGE AV,,33.9825,-118.2695
1,11401303,09/13/2010 12:00:00 AM,09/12/2010 12:00:00 AM,45,14,Pacific,1485,2,740,"VANDALISM - FELONY ($400 & OVER, ALL CHURCH VA...",0329,0,M,W,101.0,STREET,,,IC,Invest Cont,740.0,,,,SEPULVEDA BL,MANCHESTER AV,33.9599,-118.3962
2,70309629,08/09/2010 12:00:00 AM,08/09/2010 12:00:00 AM,1515,13,Newton,1324,2,946,OTHER MISCELLANEOUS CRIME,0344,0,M,H,103.0,ALLEY,,,IC,Invest Cont,946.0,,,,1300 E 21ST ST,,34.0224,-118.2524
3,90631215,01/05/2010 12:00:00 AM,01/05/2010 12:00:00 AM,150,6,Hollywood,646,2,900,VIOLATION OF COURT ORDER,1100 0400 1402,47,F,W,101.0,STREET,102.0,HAND GUN,IC,Invest Cont,900.0,998.0,,,CAHUENGA BL,HOLLYWOOD BL,34.1016,-118.3295
4,100100501,01/03/2010 12:00:00 AM,01/02/2010 12:00:00 AM,2100,1,Central,176,1,122,"RAPE, ATTEMPTED",0400,47,F,H,103.0,ALLEY,400.0,"STRONG-ARM (HANDS, FIST, FEET OR BODILY FORCE)",IC,Invest Cont,122.0,,,,8TH ST,SAN PEDRO ST,34.0387,-118.2488


In [10]:
columns = ['DATE OCC', 'TIME OCC', 'AREA ', 'AREA NAME', 'Crm Cd', 'Crm Cd Desc', 'LOCATION', 'LAT', 'LON']

In [11]:
df = df[columns]

In [12]:
date = []

date = pd.to_datetime(df['DATE OCC'])

In [13]:
df['DATE OCC'] = date

In [14]:
df['TIME OCC'] = df['TIME OCC'].apply(lambda x: f"{x:04d}"[:2] + ':' + f"{x:04d}"[2:])

In [15]:
df['TIME OCC'] = pd.to_datetime(df['TIME OCC'], format='%H:%M').dt.time

In [16]:
df['Day'] = df['DATE OCC'].dt.day
df['Month'] = df['DATE OCC'].dt.month
df['Year'] = df['DATE OCC'].dt.year

df.head()

Unnamed: 0,DATE OCC,TIME OCC,AREA,AREA NAME,Crm Cd,Crm Cd Desc,LOCATION,LAT,LON,Day,Month,Year
0,2010-02-20,13:50:00,13,Newton,900,VIOLATION OF COURT ORDER,300 E GAGE AV,33.9825,-118.2695,20,2,2010
1,2010-09-12,00:45:00,14,Pacific,740,"VANDALISM - FELONY ($400 & OVER, ALL CHURCH VA...",SEPULVEDA BL,33.9599,-118.3962,12,9,2010
2,2010-08-09,15:15:00,13,Newton,946,OTHER MISCELLANEOUS CRIME,1300 E 21ST ST,34.0224,-118.2524,9,8,2010
3,2010-01-05,01:50:00,6,Hollywood,900,VIOLATION OF COURT ORDER,CAHUENGA BL,34.1016,-118.3295,5,1,2010
4,2010-01-02,21:00:00,1,Central,122,"RAPE, ATTEMPTED",8TH ST,34.0387,-118.2488,2,1,2010


In [17]:
df['TIME OCC'] = pd.to_datetime(df['TIME OCC'], format='%H:%M:%S').dt.time

In [18]:
df['Hour'] = df['TIME OCC'].apply(lambda x: x.hour)
df['Minute'] = df['TIME OCC'].apply(lambda x: x.minute)

In [19]:
bins = [0, 6, 9, 12, 17, 21, 24]
labels = ['Late Night', 'Early Morning', 'Morning', 'Afternoon', 'Evening', 'Night']

df['Time of Day'] = pd.cut(df['Hour'], bins=bins, labels=labels, right=False)

In [20]:
df['Day of Week'] = df['DATE OCC'].dt.day_name()

In [21]:
df['Is_Weekend'] = (df['DATE OCC'].dt.weekday >= 5).astype(int)

In [22]:
us_holidays = holidays.US(years=df['DATE OCC'].dt.year.unique())

In [23]:
df['Is_Holiday'] = df['DATE OCC'].dt.date.isin(us_holidays.keys()).astype(int)

In [26]:
columns = ['DATE OCC', 'Day', 'Month', 'Year', 'Day of Week', 'Is_Weekend', 'Is_Holiday', 'TIME OCC', 'Hour', 'Minute', 'Time of Day', 'AREA ', 'AREA NAME', 'Crm Cd', 'Crm Cd Desc', 'LOCATION', 'LAT', 'LON']

In [27]:
df = df[columns]

In [28]:
crime_groups = {
    'Vehicle Theft': ['VEHICLE - STOLEN', 'BIKE - STOLEN', 'VEHICLE, STOLEN - OTHER', 'VEHICLE - ATTEMPT STOLEN'],
    'Theft': ['SHOPLIFTING-GRAND THEFT', 'SHOPLIFTING - PETTY THEFT', 'THEFT-GRAND', 'THEFT FROM MOTOR VEHICLE', 'THEFT FROM PERSON'],
    'Burglary': ['BURGLARY', 'BURGLARY FROM VEHICLE', 'BURGLARY, ATTEMPTED'],
    'Assault': ['ASSAULT', 'BATTERY', 'AGGRAVATED ASSAULT', 'INTIMATE PARTNER - SIMPLE ASSAULT'],
    'Robbery': ['ROBBERY', 'ATTEMPTED ROBBERY'],
    'Vandalism': ['VANDALISM', 'VANDALISM - FELONY'],
    'Drug Offenses': ['DRUGS', 'PIMPING', 'HUMAN TRAFFICKING'],
    'Sexual Offenses': ['RAPE', 'SEX OFFENDER', 'SEXUAL ASSAULT', 'SEXUAL PENETRATION W/FOREIGN OBJECT', 'BATTERY WITH SEXUAL CONTACT'],
    'Violence': ['MANSLAUGHTER', 'HOMICIDE', 'STABBING', 'SHOTS FIRED'],
    'Fraud': ['FRAUD', 'DOCUMENT FORGERY', 'COUNTERFEIT', 'EMBEZZLEMENT, GRAND THEFT ($950.01 & OVER)', 'EMBEZZLEMENT, PETTY THEFT ($950 & UNDER)'],
    'Other': ['OTHER MISCELLANEOUS CRIME', 'TRESPASSING', 'FAILURE TO YIELD', 'DISTURBING THE PEACE'],
    'Weapons/Arms': ['WEAPONS POSSESSION/BOMBING', 'BRANDISH WEAPON'],
    'Kidnapping': ['KIDNAPPING', 'KIDNAPPING - GRAND ATTEMPT'],
    'Extortion': ['EXTORTION'],
    'Cyber Crimes': ['UNAUTHORIZED COMPUTER ACCESS'],
    'Public Disorder': ['LYNCHING'],
    'Stalking': ['STALKING'],
}

In [29]:
def categorize_crime(crime_desc):
    for group, keywords in crime_groups.items():
        if any(keyword in crime_desc for keyword in keywords):
            return group
    return 'Other'

In [30]:
df['Crm Cd Desc'] = df['Crm Cd Desc'].apply(categorize_crime)

In [31]:
df['Crm Cd'] = pd.factorize(df['Crm Cd Desc'])[0]

In [32]:
df.head()

Unnamed: 0,DATE OCC,Day,Month,Year,Day of Week,Is_Weekend,Is_Holiday,TIME OCC,Hour,Minute,Time of Day,AREA,AREA NAME,Crm Cd,Crm Cd Desc,LOCATION,LAT,LON
0,2010-02-20,20,2,2010,Saturday,1,0,13:50:00,13,50,Afternoon,13,Newton,0,Other,300 E GAGE AV,33.9825,-118.2695
1,2010-09-12,12,9,2010,Sunday,1,0,00:45:00,0,45,Late Night,14,Pacific,1,Vandalism,SEPULVEDA BL,33.9599,-118.3962
2,2010-08-09,9,8,2010,Monday,0,0,15:15:00,15,15,Afternoon,13,Newton,0,Other,1300 E 21ST ST,34.0224,-118.2524
3,2010-01-05,5,1,2010,Tuesday,0,0,01:50:00,1,50,Late Night,6,Hollywood,0,Other,CAHUENGA BL,34.1016,-118.3295
4,2010-01-02,2,1,2010,Saturday,1,0,21:00:00,21,0,Night,1,Central,2,Sexual Offenses,8TH ST,34.0387,-118.2488


In [33]:
df.to_csv('/content/drive/MyDrive/GDG/Research Paper/Datasets/Clean_New_LA.csv', index=False)