In [1]:
# Dependencies
import pandas as pd
from pathlib import Path
import numpy as np
import datetime as dt
import re

In [4]:
# Name of the CSV file
file = Path('Crime_Data_from_2020_to_Present_20241017.csv')

In [5]:
# The correct encoding must be used to read the CSV in pandas
df = pd.read_csv(file)

In [6]:
# Preview of the DataFrame
# Note that Memo_CD is likely a meaningless column
df.head()

Unnamed: 0,DR_NO,Date Rptd,DATE OCC,TIME OCC,AREA,AREA NAME,Rpt Dist No,Part 1-2,Crm Cd,Crm Cd Desc,...,Status,Status Desc,Crm Cd 1,Crm Cd 2,Crm Cd 3,Crm Cd 4,LOCATION,Cross Street,LAT,LON
0,190326475,03/01/2020 12:00:00 AM,03/01/2020 12:00:00 AM,2130,7,Wilshire,784,1,510,VEHICLE - STOLEN,...,AA,Adult Arrest,510.0,998.0,,,1900 S LONGWOOD AV,,34.0375,-118.3506
1,200106753,02/09/2020 12:00:00 AM,02/08/2020 12:00:00 AM,1800,1,Central,182,1,330,BURGLARY FROM VEHICLE,...,IC,Invest Cont,330.0,998.0,,,1000 S FLOWER ST,,34.0444,-118.2628
2,200320258,11/11/2020 12:00:00 AM,11/04/2020 12:00:00 AM,1700,3,Southwest,356,1,480,BIKE - STOLEN,...,IC,Invest Cont,480.0,,,,1400 W 37TH ST,,34.021,-118.3002
3,200907217,05/10/2023 12:00:00 AM,03/10/2020 12:00:00 AM,2037,9,Van Nuys,964,1,343,SHOPLIFTING-GRAND THEFT ($950.01 & OVER),...,IC,Invest Cont,343.0,,,,14000 RIVERSIDE DR,,34.1576,-118.4387
4,220614831,08/18/2022 12:00:00 AM,08/17/2020 12:00:00 AM,1200,6,Hollywood,666,2,354,THEFT OF IDENTITY,...,IC,Invest Cont,354.0,,,,1900 TRANSIENT,,34.0944,-118.3277


In [7]:
df.dtypes

DR_NO               int64
Date Rptd          object
DATE OCC           object
TIME OCC            int64
AREA                int64
AREA NAME          object
Rpt Dist No         int64
Part 1-2            int64
Crm Cd              int64
Crm Cd Desc        object
Mocodes            object
Vict Age            int64
Vict Sex           object
Vict Descent       object
Premis Cd         float64
Premis Desc        object
Weapon Used Cd    float64
Weapon Desc        object
Status             object
Status Desc        object
Crm Cd 1          float64
Crm Cd 2          float64
Crm Cd 3          float64
Crm Cd 4          float64
LOCATION           object
Cross Street       object
LAT               float64
LON               float64
dtype: object

In [8]:
# Identify incomplete rows
df.count()

DR_NO             986500
Date Rptd         986500
DATE OCC          986500
TIME OCC          986500
AREA              986500
AREA NAME         986500
Rpt Dist No       986500
Part 1-2          986500
Crm Cd            986500
Crm Cd Desc       986500
Mocodes           840065
Vict Age          986500
Vict Sex          846925
Vict Descent      846914
Premis Cd         986486
Premis Desc       985915
Weapon Used Cd    326368
Weapon Desc       326368
Status            986499
Status Desc       986500
Crm Cd 1          986489
Crm Cd 2           68912
Crm Cd 3            2310
Crm Cd 4              64
LOCATION          986500
Cross Street      152270
LAT               986500
LON               986500
dtype: int64

In [9]:
crime_codes = df['Crm Cd'].unique()
crime_codes

array([510, 330, 480, 343, 354, 624, 821, 812, 230, 956, 341, 930, 668,
       420, 813, 440, 626, 762, 441, 310, 331, 210, 662, 860, 236, 820,
       661, 810, 901, 442, 740, 946, 761, 649, 845, 121, 745, 627, 653,
       928, 815, 940, 625, 352, 648, 886, 666, 921, 805, 932, 900, 903,
       439, 954, 434, 235, 220, 654, 922, 760, 670, 850, 237, 763, 345,
       888, 320, 122, 753, 822, 520, 806, 906, 437, 410, 350, 623, 522,
       450, 890, 755, 231, 664, 251, 951, 920, 250, 470, 902, 647, 651,
       910, 110, 351, 421, 444, 814, 756, 433, 931, 435, 438, 443, 660,
       950, 622, 943, 487, 949, 933, 865, 474, 652, 113, 446, 475, 471,
       451, 436, 485, 944, 349, 942, 347, 353, 870, 473, 880, 452, 924,
       840, 948, 884, 904, 830, 432, 882, 445, 926, 453], dtype=int64)

In [10]:
len(crime_codes)

140

In [11]:
crime_codes1 = df['Crm Cd 1'].unique()
crime_codes1

array([510., 330., 480., 343., 354., 624., 812., 230., 956., 341., 930.,
       668., 420., 813., 440., 626., 762., 441., 310., 331., 210., 662.,
       860., 236., 661., 810., 901., 442., 740., 946., 761., 649., 845.,
       121., 745., 627., 653., 821., 928., 815., 940., 625., 352., 648.,
       886., 666., 921., 805., 932., 900., 820., 903., 439., 760., 954.,
       434., 235., 220., 654., 922., 670., 850., 237., 763., 345., 888.,
       320., 122., 753., 822., 520., 806., 906., 437., 410., 350., 623.,
       522., 450., 890., 755., 231., 664., 251., 951., 920., 250., 470.,
       902., 647., 651., 910., 110., 351., 421., 444., 814., 756., 433.,
       931., 435., 438., 443., 660., 950., 622., 943., 487.,  nan, 949.,
       933., 865., 474., 652., 113., 446., 475., 471., 451., 436., 521.,
       485., 944., 349., 942., 347., 353., 430., 870., 473., 880., 452.,
       924., 840., 948., 884., 904., 830., 432., 882., 445., 926., 453.])

In [12]:
len(crime_codes1)

143

In [13]:
crime_desc = df['Crm Cd Desc'].unique()
crime_desc

array(['VEHICLE - STOLEN', 'BURGLARY FROM VEHICLE', 'BIKE - STOLEN',
       'SHOPLIFTING-GRAND THEFT ($950.01 & OVER)', 'THEFT OF IDENTITY',
       'BATTERY - SIMPLE ASSAULT',
       'SODOMY/SEXUAL CONTACT B/W PENIS OF ONE PERS TO ANUS OTH',
       'CRM AGNST CHLD (13 OR UNDER) (14-15 & SUSP 10 YRS OLDER)',
       'ASSAULT WITH DEADLY WEAPON, AGGRAVATED ASSAULT',
       'LETTERS, LEWD  -  TELEPHONE CALLS, LEWD',
       'THEFT-GRAND ($950.01 & OVER)EXCPT,GUNS,FOWL,LIVESTK,PROD',
       'CRIMINAL THREATS - NO WEAPON DISPLAYED',
       'EMBEZZLEMENT, GRAND THEFT ($950.01 & OVER)',
       'THEFT FROM MOTOR VEHICLE - PETTY ($950 & UNDER)',
       'CHILD ANNOYING (17YRS & UNDER)',
       'THEFT PLAIN - PETTY ($950 & UNDER)',
       'INTIMATE PARTNER - SIMPLE ASSAULT', 'LEWD CONDUCT',
       'THEFT PLAIN - ATTEMPT', 'BURGLARY',
       'THEFT FROM MOTOR VEHICLE - GRAND ($950.01 AND OVER)', 'ROBBERY',
       'BUNCO, GRAND THEFT', 'BATTERY WITH SEXUAL CONTACT',
       'INTIMATE PARTNER - AGGRAVA

In [14]:
df.describe(include=['object'])

Unnamed: 0,Date Rptd,DATE OCC,AREA NAME,Crm Cd Desc,Mocodes,Vict Sex,Vict Descent,Premis Desc,Weapon Desc,Status,Status Desc,LOCATION,Cross Street
count,986500,986500,986500,986500,840065,846925,846914,985915,326368,986499,986500,986500,152270
unique,1749,1749,21,140,309567,5,20,306,79,6,6,66322,10337
top,02/02/2023 12:00:00 AM,01/01/2020 12:00:00 AM,Central,VEHICLE - STOLEN,344,M,H,STREET,"STRONG-ARM (HANDS, FIST, FEET OR BODILY FORCE)",IC,Invest Cont,800 N ALAMEDA ST,BROADWAY
freq,929,1157,68166,111632,41144,397948,293088,254978,174484,788335,788335,2556,2467


In [15]:
renamed_data = df.rename(columns={"DR_NO" : "dr_no","DATE OCC": "date_occ", "TIME OCC":"time_occ", "AREA":"division", "AREA NAME":"div_name", "Crm Cd": "crime_id", "Crm Cd Desc":"crime_desc","Vict Age":"vict_age","Vict Sex":"vict_sex","Vict Descent":"vict_descent", "Crm Cd 1":"crime_id1","LOCATION":"loc_desc", "LAT":"lat", "LON":"lon"})
renamed_data.count()

dr_no             986500
Date Rptd         986500
date_occ          986500
time_occ          986500
division          986500
div_name          986500
Rpt Dist No       986500
Part 1-2          986500
crime_id          986500
crime_desc        986500
Mocodes           840065
vict_age          986500
vict_sex          846925
vict_descent      846914
Premis Cd         986486
Premis Desc       985915
Weapon Used Cd    326368
Weapon Desc       326368
Status            986499
Status Desc       986500
crime_id1         986489
Crm Cd 2           68912
Crm Cd 3            2310
Crm Cd 4              64
loc_desc          986500
Cross Street      152270
lat               986500
lon               986500
dtype: int64

In [16]:
# Create a "year","month" and "day" column with the "occ_date" column.
ordered_data1 = renamed_data.copy()
ordered_data1[["day", "month", "year"]] = ordered_data1["date_occ"].str.split('/', n=3, expand=True)

# Drop the contact_name column
ordered_data1.head()

Unnamed: 0,dr_no,Date Rptd,date_occ,time_occ,division,div_name,Rpt Dist No,Part 1-2,crime_id,crime_desc,...,Crm Cd 2,Crm Cd 3,Crm Cd 4,loc_desc,Cross Street,lat,lon,day,month,year
0,190326475,03/01/2020 12:00:00 AM,03/01/2020 12:00:00 AM,2130,7,Wilshire,784,1,510,VEHICLE - STOLEN,...,998.0,,,1900 S LONGWOOD AV,,34.0375,-118.3506,3,1,2020 12:00:00 AM
1,200106753,02/09/2020 12:00:00 AM,02/08/2020 12:00:00 AM,1800,1,Central,182,1,330,BURGLARY FROM VEHICLE,...,998.0,,,1000 S FLOWER ST,,34.0444,-118.2628,2,8,2020 12:00:00 AM
2,200320258,11/11/2020 12:00:00 AM,11/04/2020 12:00:00 AM,1700,3,Southwest,356,1,480,BIKE - STOLEN,...,,,,1400 W 37TH ST,,34.021,-118.3002,11,4,2020 12:00:00 AM
3,200907217,05/10/2023 12:00:00 AM,03/10/2020 12:00:00 AM,2037,9,Van Nuys,964,1,343,SHOPLIFTING-GRAND THEFT ($950.01 & OVER),...,,,,14000 RIVERSIDE DR,,34.1576,-118.4387,3,10,2020 12:00:00 AM
4,220614831,08/18/2022 12:00:00 AM,08/17/2020 12:00:00 AM,1200,6,Hollywood,666,2,354,THEFT OF IDENTITY,...,,,,1900 TRANSIENT,,34.0944,-118.3277,8,17,2020 12:00:00 AM


In [17]:
ordered_data1[["year", "tail"]] = ordered_data1["year"].str.split(' ', n=1, expand=True)

# Drop the contact_name column
ordered_data1.head()

Unnamed: 0,dr_no,Date Rptd,date_occ,time_occ,division,div_name,Rpt Dist No,Part 1-2,crime_id,crime_desc,...,Crm Cd 3,Crm Cd 4,loc_desc,Cross Street,lat,lon,day,month,year,tail
0,190326475,03/01/2020 12:00:00 AM,03/01/2020 12:00:00 AM,2130,7,Wilshire,784,1,510,VEHICLE - STOLEN,...,,,1900 S LONGWOOD AV,,34.0375,-118.3506,3,1,2020,12:00:00 AM
1,200106753,02/09/2020 12:00:00 AM,02/08/2020 12:00:00 AM,1800,1,Central,182,1,330,BURGLARY FROM VEHICLE,...,,,1000 S FLOWER ST,,34.0444,-118.2628,2,8,2020,12:00:00 AM
2,200320258,11/11/2020 12:00:00 AM,11/04/2020 12:00:00 AM,1700,3,Southwest,356,1,480,BIKE - STOLEN,...,,,1400 W 37TH ST,,34.021,-118.3002,11,4,2020,12:00:00 AM
3,200907217,05/10/2023 12:00:00 AM,03/10/2020 12:00:00 AM,2037,9,Van Nuys,964,1,343,SHOPLIFTING-GRAND THEFT ($950.01 & OVER),...,,,14000 RIVERSIDE DR,,34.1576,-118.4387,3,10,2020,12:00:00 AM
4,220614831,08/18/2022 12:00:00 AM,08/17/2020 12:00:00 AM,1200,6,Hollywood,666,2,354,THEFT OF IDENTITY,...,,,1900 TRANSIENT,,34.0944,-118.3277,8,17,2020,12:00:00 AM


In [18]:
ordered_data1 = ordered_data1.drop(columns = ['tail'])
ordered_data1.head()

Unnamed: 0,dr_no,Date Rptd,date_occ,time_occ,division,div_name,Rpt Dist No,Part 1-2,crime_id,crime_desc,...,Crm Cd 2,Crm Cd 3,Crm Cd 4,loc_desc,Cross Street,lat,lon,day,month,year
0,190326475,03/01/2020 12:00:00 AM,03/01/2020 12:00:00 AM,2130,7,Wilshire,784,1,510,VEHICLE - STOLEN,...,998.0,,,1900 S LONGWOOD AV,,34.0375,-118.3506,3,1,2020
1,200106753,02/09/2020 12:00:00 AM,02/08/2020 12:00:00 AM,1800,1,Central,182,1,330,BURGLARY FROM VEHICLE,...,998.0,,,1000 S FLOWER ST,,34.0444,-118.2628,2,8,2020
2,200320258,11/11/2020 12:00:00 AM,11/04/2020 12:00:00 AM,1700,3,Southwest,356,1,480,BIKE - STOLEN,...,,,,1400 W 37TH ST,,34.021,-118.3002,11,4,2020
3,200907217,05/10/2023 12:00:00 AM,03/10/2020 12:00:00 AM,2037,9,Van Nuys,964,1,343,SHOPLIFTING-GRAND THEFT ($950.01 & OVER),...,,,,14000 RIVERSIDE DR,,34.1576,-118.4387,3,10,2020
4,220614831,08/18/2022 12:00:00 AM,08/17/2020 12:00:00 AM,1200,6,Hollywood,666,2,354,THEFT OF IDENTITY,...,,,,1900 TRANSIENT,,34.0944,-118.3277,8,17,2020


In [19]:
years = ordered_data1['year'].unique()
years

array(['2020', '2021', '2022', '2023', '2024'], dtype=object)

In [20]:
renamed_data1 = ordered_data1.rename(columns={"DR_NO" : "dr_no","Date Rptd": "date_rpt","DATE OCC": "date_occ", "TIME OCC":"time_occ", "AREA":"division", "AREA NAME":"div_name", "Crm Cd": "crime_id", "Crm Cd Desc":"crime_desc","Vict Age":"vict_age","Vict Sex":"vict_sex","Vict Descent":"vict_descent", "Crm Cd 1":"crime_id1","LOCATION":"loc_desc", "LAT":"lat", "LON":"lon"})
renamed_data1.head()

Unnamed: 0,dr_no,date_rpt,date_occ,time_occ,division,div_name,Rpt Dist No,Part 1-2,crime_id,crime_desc,...,Crm Cd 2,Crm Cd 3,Crm Cd 4,loc_desc,Cross Street,lat,lon,day,month,year
0,190326475,03/01/2020 12:00:00 AM,03/01/2020 12:00:00 AM,2130,7,Wilshire,784,1,510,VEHICLE - STOLEN,...,998.0,,,1900 S LONGWOOD AV,,34.0375,-118.3506,3,1,2020
1,200106753,02/09/2020 12:00:00 AM,02/08/2020 12:00:00 AM,1800,1,Central,182,1,330,BURGLARY FROM VEHICLE,...,998.0,,,1000 S FLOWER ST,,34.0444,-118.2628,2,8,2020
2,200320258,11/11/2020 12:00:00 AM,11/04/2020 12:00:00 AM,1700,3,Southwest,356,1,480,BIKE - STOLEN,...,,,,1400 W 37TH ST,,34.021,-118.3002,11,4,2020
3,200907217,05/10/2023 12:00:00 AM,03/10/2020 12:00:00 AM,2037,9,Van Nuys,964,1,343,SHOPLIFTING-GRAND THEFT ($950.01 & OVER),...,,,,14000 RIVERSIDE DR,,34.1576,-118.4387,3,10,2020
4,220614831,08/18/2022 12:00:00 AM,08/17/2020 12:00:00 AM,1200,6,Hollywood,666,2,354,THEFT OF IDENTITY,...,,,,1900 TRANSIENT,,34.0944,-118.3277,8,17,2020


In [21]:
filtered_data1 = renamed_data1.copy()
filtered_data1 = filtered_data1.loc[(filtered_data1["year"] >= '2023' )]
filtered_data1 

Unnamed: 0,dr_no,date_rpt,date_occ,time_occ,division,div_name,Rpt Dist No,Part 1-2,crime_id,crime_desc,...,Crm Cd 2,Crm Cd 3,Crm Cd 4,loc_desc,Cross Street,lat,lon,day,month,year
644787,230611294,06/17/2023 12:00:00 AM,06/17/2023 12:00:00 AM,1140,6,Hollywood,668,1,442,SHOPLIFTING - PETTY THEFT ($950 & UNDER),...,,,,5500 W SUNSET BL,,34.0981,-118.3092,06,17,2023
644788,230106843,02/12/2023 12:00:00 AM,02/11/2023 12:00:00 AM,1630,1,Central,191,1,440,THEFT PLAIN - PETTY ($950 & UNDER),...,,,,1400 WRIGHT ST,,34.0396,-118.2726,02,11,2023
644789,231515059,09/19/2023 12:00:00 AM,09/19/2023 12:00:00 AM,1030,15,N Hollywood,1549,1,230,"ASSAULT WITH DEADLY WEAPON, AGGRAVATED ASSAULT",...,761.0,,,BURBANK BL,CAHUENGA BL,34.1721,-118.3616,09,19,2023
644790,230110347,04/11/2023 12:00:00 AM,04/10/2023 12:00:00 AM,816,1,Central,192,1,331,THEFT FROM MOTOR VEHICLE - GRAND ($950.01 AND ...,...,,,,1300 S FLOWER ST,,34.0401,-118.2669,04,10,2023
644791,230618504,12/01/2023 12:00:00 AM,12/01/2023 12:00:00 AM,110,6,Hollywood,639,2,626,INTIMATE PARTNER - SIMPLE ASSAULT,...,,,,5400 RUSSELL AV,,34.1042,-118.3072,12,01,2023
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
986495,242011172,08/20/2024 12:00:00 AM,08/17/2024 12:00:00 AM,2300,20,Olympic,2033,1,341,"THEFT-GRAND ($950.01 & OVER)EXCPT,GUNS,FOWL,LI...",...,,,,3700 WILSHIRE BL,,34.0617,-118.3066,08,17,2024
986496,240710284,07/24/2024 12:00:00 AM,07/23/2024 12:00:00 AM,1400,7,Wilshire,788,1,510,VEHICLE - STOLEN,...,,,,4000 W 23RD ST,,34.0362,-118.3284,07,23,2024
986497,240104953,01/15/2024 12:00:00 AM,01/15/2024 12:00:00 AM,100,1,Central,101,2,745,VANDALISM - MISDEAMEANOR ($399 OR UNDER),...,,,,1300 W SUNSET BL,,34.0685,-118.2460,01,15,2024
986498,240309674,04/24/2024 12:00:00 AM,04/24/2024 12:00:00 AM,1500,3,Southwest,358,1,230,"ASSAULT WITH DEADLY WEAPON, AGGRAVATED ASSAULT",...,,,,FLOWER ST,JEFFERSON BL,34.0215,-118.2868,04,24,2024


In [22]:
filtered_data2 = filtered_data1.copy()
filtered_data2 = filtered_data1.loc[(filtered_data1["month"] >= '09' )]
filtered_data2

Unnamed: 0,dr_no,date_rpt,date_occ,time_occ,division,div_name,Rpt Dist No,Part 1-2,crime_id,crime_desc,...,Crm Cd 2,Crm Cd 3,Crm Cd 4,loc_desc,Cross Street,lat,lon,day,month,year
644787,230611294,06/17/2023 12:00:00 AM,06/17/2023 12:00:00 AM,1140,6,Hollywood,668,1,442,SHOPLIFTING - PETTY THEFT ($950 & UNDER),...,,,,5500 W SUNSET BL,,34.0981,-118.3092,06,17,2023
644788,230106843,02/12/2023 12:00:00 AM,02/11/2023 12:00:00 AM,1630,1,Central,191,1,440,THEFT PLAIN - PETTY ($950 & UNDER),...,,,,1400 WRIGHT ST,,34.0396,-118.2726,02,11,2023
644789,231515059,09/19/2023 12:00:00 AM,09/19/2023 12:00:00 AM,1030,15,N Hollywood,1549,1,230,"ASSAULT WITH DEADLY WEAPON, AGGRAVATED ASSAULT",...,761.0,,,BURBANK BL,CAHUENGA BL,34.1721,-118.3616,09,19,2023
644790,230110347,04/11/2023 12:00:00 AM,04/10/2023 12:00:00 AM,816,1,Central,192,1,331,THEFT FROM MOTOR VEHICLE - GRAND ($950.01 AND ...,...,,,,1300 S FLOWER ST,,34.0401,-118.2669,04,10,2023
644792,242105761,02/13/2024 12:00:00 AM,12/17/2023 12:00:00 AM,730,21,Topanga,2149,1,210,ROBBERY,...,,,,6600 QUARTZ AV,,34.1917,-118.5641,12,17,2023
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
986495,242011172,08/20/2024 12:00:00 AM,08/17/2024 12:00:00 AM,2300,20,Olympic,2033,1,341,"THEFT-GRAND ($950.01 & OVER)EXCPT,GUNS,FOWL,LI...",...,,,,3700 WILSHIRE BL,,34.0617,-118.3066,08,17,2024
986496,240710284,07/24/2024 12:00:00 AM,07/23/2024 12:00:00 AM,1400,7,Wilshire,788,1,510,VEHICLE - STOLEN,...,,,,4000 W 23RD ST,,34.0362,-118.3284,07,23,2024
986497,240104953,01/15/2024 12:00:00 AM,01/15/2024 12:00:00 AM,100,1,Central,101,2,745,VANDALISM - MISDEAMEANOR ($399 OR UNDER),...,,,,1300 W SUNSET BL,,34.0685,-118.2460,01,15,2024
986498,240309674,04/24/2024 12:00:00 AM,04/24/2024 12:00:00 AM,1500,3,Southwest,358,1,230,"ASSAULT WITH DEADLY WEAPON, AGGRAVATED ASSAULT",...,,,,FLOWER ST,JEFFERSON BL,34.0215,-118.2868,04,24,2024


In [23]:
data_cleaned = filtered_data2.drop(columns = ['Rpt Dist No','Part 1-2', 'Mocodes', 'Premis Cd','Premis Desc','Weapon Used Cd','Weapon Desc','Status','Status Desc','crime_id1','Crm Cd 2','Crm Cd 3','Crm Cd 4','Cross Street'])
data_cleaned

Unnamed: 0,dr_no,date_rpt,date_occ,time_occ,division,div_name,crime_id,crime_desc,vict_age,vict_sex,vict_descent,loc_desc,lat,lon,day,month,year
644787,230611294,06/17/2023 12:00:00 AM,06/17/2023 12:00:00 AM,1140,6,Hollywood,442,SHOPLIFTING - PETTY THEFT ($950 & UNDER),0,X,X,5500 W SUNSET BL,34.0981,-118.3092,06,17,2023
644788,230106843,02/12/2023 12:00:00 AM,02/11/2023 12:00:00 AM,1630,1,Central,440,THEFT PLAIN - PETTY ($950 & UNDER),48,M,H,1400 WRIGHT ST,34.0396,-118.2726,02,11,2023
644789,231515059,09/19/2023 12:00:00 AM,09/19/2023 12:00:00 AM,1030,15,N Hollywood,230,"ASSAULT WITH DEADLY WEAPON, AGGRAVATED ASSAULT",36,F,W,BURBANK BL,34.1721,-118.3616,09,19,2023
644790,230110347,04/11/2023 12:00:00 AM,04/10/2023 12:00:00 AM,816,1,Central,331,THEFT FROM MOTOR VEHICLE - GRAND ($950.01 AND ...,25,F,H,1300 S FLOWER ST,34.0401,-118.2669,04,10,2023
644792,242105761,02/13/2024 12:00:00 AM,12/17/2023 12:00:00 AM,730,21,Topanga,210,ROBBERY,31,F,H,6600 QUARTZ AV,34.1917,-118.5641,12,17,2023
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
986495,242011172,08/20/2024 12:00:00 AM,08/17/2024 12:00:00 AM,2300,20,Olympic,341,"THEFT-GRAND ($950.01 & OVER)EXCPT,GUNS,FOWL,LI...",36,F,C,3700 WILSHIRE BL,34.0617,-118.3066,08,17,2024
986496,240710284,07/24/2024 12:00:00 AM,07/23/2024 12:00:00 AM,1400,7,Wilshire,510,VEHICLE - STOLEN,0,,,4000 W 23RD ST,34.0362,-118.3284,07,23,2024
986497,240104953,01/15/2024 12:00:00 AM,01/15/2024 12:00:00 AM,100,1,Central,745,VANDALISM - MISDEAMEANOR ($399 OR UNDER),0,X,X,1300 W SUNSET BL,34.0685,-118.2460,01,15,2024
986498,240309674,04/24/2024 12:00:00 AM,04/24/2024 12:00:00 AM,1500,3,Southwest,230,"ASSAULT WITH DEADLY WEAPON, AGGRAVATED ASSAULT",70,F,W,FLOWER ST,34.0215,-118.2868,04,24,2024


In [24]:
data_cleaned.columns

Index(['dr_no', 'date_rpt', 'date_occ', 'time_occ', 'division', 'div_name',
       'crime_id', 'crime_desc', 'vict_age', 'vict_sex', 'vict_descent',
       'loc_desc', 'lat', 'lon', 'day', 'month', 'year'],
      dtype='object')

In [64]:
# Reorder the columns
data_cleaned = data_cleaned.reindex(columns=['dr_no', 'date_rpt', 'date_occ', 'time_occ','year','month','day', 'division', 'div_name',
       'crime_id', 'crime_desc', 'vict_age', 'vict_sex', 'vict_descent',
       'loc_desc', 'lat', 'lon'])
data_cleaned.head(20)

Unnamed: 0,dr_no,date_rpt,date_occ,time_occ,year,month,day,division,div_name,crime_id,crime_desc,vict_age,vict_sex,vict_descent,loc_desc,lat,lon
644787,230611294,06/17/2023 12:00:00 AM,06/17/2023 12:00:00 AM,1140,2023,17,6,6,Hollywood,442,SHOPLIFTING - PETTY THEFT ($950 & UNDER),0,X,X,5500 W SUNSET BL,34.0981,-118.3092
644788,230106843,02/12/2023 12:00:00 AM,02/11/2023 12:00:00 AM,1630,2023,11,2,1,Central,440,THEFT PLAIN - PETTY ($950 & UNDER),48,M,H,1400 WRIGHT ST,34.0396,-118.2726
644789,231515059,09/19/2023 12:00:00 AM,09/19/2023 12:00:00 AM,1030,2023,19,9,15,N Hollywood,230,"ASSAULT WITH DEADLY WEAPON, AGGRAVATED ASSAULT",36,F,W,BURBANK BL,34.1721,-118.3616
644790,230110347,04/11/2023 12:00:00 AM,04/10/2023 12:00:00 AM,816,2023,10,4,1,Central,331,THEFT FROM MOTOR VEHICLE - GRAND ($950.01 AND ...,25,F,H,1300 S FLOWER ST,34.0401,-118.2669
644792,242105761,02/13/2024 12:00:00 AM,12/17/2023 12:00:00 AM,730,2023,17,12,21,Topanga,210,ROBBERY,31,F,H,6600 QUARTZ AV,34.1917,-118.5641
644794,230511288,07/20/2023 12:00:00 AM,07/20/2023 12:00:00 AM,20,2023,20,7,5,Harbor,901,VIOLATION OF RESTRAINING ORDER,51,M,H,1600 W LOMITA BL,33.7978,-118.3043
644796,230514872,10/19/2023 12:00:00 AM,10/19/2023 12:00:00 AM,1520,2023,19,10,5,Harbor,761,BRANDISH WEAPON,38,F,H,600 N NEPTUNE AV,33.7795,-118.2715
644797,232005166,01/21/2023 12:00:00 AM,01/21/2023 12:00:00 AM,40,2023,21,1,20,Olympic,352,PICKPOCKET,26,M,H,3700 WILSHIRE BL,34.0617,-118.3066
644798,230107118,02/13/2023 12:00:00 AM,02/12/2023 12:00:00 AM,2100,2023,12,2,1,Central,341,"THEFT-GRAND ($950.01 & OVER)EXCPT,GUNS,FOWL,LI...",47,F,B,1500 S FIGUEROA ST,34.0388,-118.2717
644799,231225821,12/30/2023 12:00:00 AM,12/29/2023 12:00:00 AM,2200,2023,29,12,12,77th Street,510,VEHICLE - STOLEN,0,,,7800 CRENSHAW BL,33.9691,-118.3309


In [26]:
dates_cleaned = data_cleaned.copy()
dates_cleaned['date_occ'] = pd.to_datetime(dates_cleaned['date_occ'], unit='ns')
dates_cleaned['date_rpt'] = pd.to_datetime(dates_cleaned['date_rpt'], unit='ns')
dates_cleaned.dtypes

dr_no                    int64
date_rpt        datetime64[ns]
date_occ        datetime64[ns]
time_occ                 int64
year                    object
month                   object
day                     object
division                 int64
div_name                object
crime_id                 int64
crime_desc              object
vict_age                 int64
vict_sex                object
vict_descent            object
loc_desc                object
lat                    float64
lon                    float64
dtype: object

In [27]:
dates_cleaned.to_json('dates_cleaned.json', orient='records')

In [28]:
totals_division = dates_cleaned['division'].value_counts()
totals_division

division
1     18490
14    14822
3     14514
12    14206
15    12922
7     12322
6     12096
20    12077
13    11817
2     11657
18    11266
8     10947
17    10717
9     10700
10    10431
21    10349
11    10226
5      9734
19     9714
4      8541
16     7532
Name: count, dtype: int64

In [29]:
totals_crime = dates_cleaned['crime_id'].value_counts()
totals_crime

crime_id
510    31266
624    17521
330    15427
740    14371
310    13576
       ...  
840        1
830        1
436        1
926        1
453        1
Name: count, Length: 134, dtype: int64

In [30]:
totals_crime = dates_cleaned['crime_desc'].value_counts()
totals_crime

crime_desc
VEHICLE - STOLEN                                            31266
BATTERY - SIMPLE ASSAULT                                    17521
BURGLARY FROM VEHICLE                                       15427
VANDALISM - FELONY ($400 & OVER, ALL CHURCH VANDALISMS)     14371
BURGLARY                                                    13576
                                                            ...  
BEASTIALITY, CRIME AGAINST NATURE SEXUAL ASSLT WITH ANIM        1
INCEST (SEXUAL ACTS BETWEEN BLOOD RELATIVES)                    1
LYNCHING - ATTEMPTED                                            1
TRAIN WRECKING                                                  1
DRUNK ROLL - ATTEMPT                                            1
Name: count, Length: 134, dtype: int64

In [31]:
groups_crime = dates_cleaned.groupby(["crime_id"])
groups_crime.count()

Unnamed: 0_level_0,dr_no,date_rpt,date_occ,time_occ,year,month,day,division,div_name,crime_desc,vict_age,vict_sex,vict_descent,loc_desc,lat,lon
crime_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
110,309,309,309,309,309,309,309,309,309,309,309,307,307,309,309,309
121,720,720,720,720,720,720,720,720,720,720,720,720,720,720,720,720
122,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66
210,7440,7440,7440,7440,7440,7440,7440,7440,7440,7440,7440,7436,7436,7440,7440,7440
220,1018,1018,1018,1018,1018,1018,1018,1018,1018,1018,1018,1016,1016,1018,1018,1018
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
949,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10
950,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15
951,88,88,88,88,88,88,88,88,88,88,88,88,88,88,88,88
954,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8


In [32]:
crime_details = dates_cleaned.loc[(dates_cleaned["crime_id"] == 954)]
crime_details

Unnamed: 0,dr_no,date_rpt,date_occ,time_occ,year,month,day,division,div_name,crime_id,crime_desc,vict_age,vict_sex,vict_descent,loc_desc,lat,lon
672733,231717551,2023-10-25,2023-10-21,1600,2023,21,10,17,Devonshire,954,CONTRIBUTING,16,F,O,17900 DEVONSHIRE ST,34.2611,-118.5158
698761,231715436,2023-09-19,2023-09-16,1630,2023,16,9,17,Devonshire,954,CONTRIBUTING,12,F,B,10200 BALBOA BL,34.2557,-118.5023
708915,231704522,2023-01-13,2023-01-13,2000,2023,13,1,17,Devonshire,954,CONTRIBUTING,13,F,B,9600 WILBUR AV,34.2448,-118.5448
713668,231017632,2023-12-27,2023-10-09,2037,2023,9,10,10,West Valley,954,CONTRIBUTING,16,M,H,15700 SATICOY ST,34.2085,-118.4749
781071,230911005,2023-06-24,2023-06-24,800,2023,24,6,9,Van Nuys,954,CONTRIBUTING,13,F,O,12900 HESBY ST,34.1631,-118.4151
862253,230513347,2023-09-10,2023-09-09,2215,2023,9,9,5,Harbor,954,CONTRIBUTING,16,F,H,700 W 30TH ST,33.7178,-118.2975
877174,241007440,2024-03-26,2024-03-13,800,2024,13,3,10,West Valley,954,CONTRIBUTING,16,F,O,15700 COVELLO ST,34.2075,-118.476
884253,241604803,2024-01-27,2024-01-26,1630,2024,26,1,16,Foothill,954,CONTRIBUTING,12,F,H,13600 VAN NUYS BL,34.2612,-118.4288


In [33]:
crime_desc2 = dates_cleaned['crime_desc'].unique()
len(crime_desc2)

134

In [34]:
crime_id2 = dates_cleaned['crime_id'].unique()
len(crime_id2)

134

In [35]:
crimes = dates_cleaned.loc[(dates_cleaned["crime_id"] > 100) & (dates_cleaned["crime_id"] < 200) ]
crimes

Unnamed: 0,dr_no,date_rpt,date_occ,time_occ,year,month,day,division,div_name,crime_id,crime_desc,vict_age,vict_sex,vict_descent,loc_desc,lat,lon
645222,230712760,2023-07-31,2023-07-31,100,2023,31,07,7,Wilshire,110,CRIMINAL HOMICIDE,35,F,W,400 S COCHRAN AV,34.0669,-118.3472
645324,232008108,2023-04-12,2023-04-12,2215,2023,12,04,20,Olympic,110,CRIMINAL HOMICIDE,17,M,H,700 S BERENDO ST,34.0598,-118.2941
645448,230127049,2023-12-23,2023-12-23,213,2023,23,12,1,Central,110,CRIMINAL HOMICIDE,65,M,B,5TH,34.0435,-118.2427
645470,231301019,2023-09-15,2023-09-13,1900,2023,13,09,13,Newton,121,"RAPE, FORCIBLE",42,F,B,16TH ST,34.0232,-118.2409
646201,230615479,2023-09-18,2023-09-14,15,2023,14,09,6,Hollywood,121,"RAPE, FORCIBLE",23,F,B,WILCOX,34.1016,-118.3310
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
983728,240308934,2024-04-07,2024-03-29,2000,2024,29,03,3,Southwest,121,"RAPE, FORCIBLE",19,F,H,3000 S LA BREA AV,34.0272,-118.3537
983919,241508434,2024-03-31,2024-03-26,1500,2024,26,03,15,N Hollywood,121,"RAPE, FORCIBLE",19,F,O,5300 LANKERSHIM BL,34.1670,-118.3759
984383,241509376,2024-04-22,2024-04-22,510,2024,22,04,15,N Hollywood,110,CRIMINAL HOMICIDE,67,F,H,3900 LANKERSHIM BL,34.1393,-118.3620
986194,240208123,2024-03-20,2024-02-27,1200,2024,27,02,2,Rampart,121,"RAPE, FORCIBLE",0,F,B,1300 ANGELINA ST,34.0648,-118.2559


In [36]:
crimes['crime_id'].unique()

array([110, 121, 122], dtype=int64)

In [37]:
crimes['crime_desc'].unique()

array(['CRIMINAL HOMICIDE', 'RAPE, FORCIBLE', 'RAPE, ATTEMPTED'],
      dtype=object)

In [38]:
crimes2 = dates_cleaned.loc[(dates_cleaned["crime_id"] >= 200) & (dates_cleaned["crime_id"] < 300) ]

In [39]:
crimes2['crime_id'].unique()

array([230, 210, 236, 220, 231, 237, 235, 251, 250], dtype=int64)

In [40]:
crimes2['crime_desc'].unique()

array(['ASSAULT WITH DEADLY WEAPON, AGGRAVATED ASSAULT', 'ROBBERY',
       'INTIMATE PARTNER - AGGRAVATED ASSAULT', 'ATTEMPTED ROBBERY',
       'ASSAULT WITH DEADLY WEAPON ON POLICE OFFICER',
       'CHILD NEGLECT (SEE 300 W.I.C.)',
       'CHILD ABUSE (PHYSICAL) - AGGRAVATED ASSAULT',
       'SHOTS FIRED AT INHABITED DWELLING',
       'SHOTS FIRED AT MOVING VEHICLE, TRAIN OR AIRCRAFT'], dtype=object)

In [41]:
crimes3 = dates_cleaned.loc[(dates_cleaned["crime_id"] >= 300) & (dates_cleaned["crime_id"] < 400) ]

In [42]:
crimes3['crime_id'].unique()

array([331, 352, 341, 310, 354, 330, 350, 320, 343, 351, 353, 349, 347,
       345], dtype=int64)

In [43]:
crimes3['crime_desc'].unique()

array(['THEFT FROM MOTOR VEHICLE - GRAND ($950.01 AND OVER)',
       'PICKPOCKET',
       'THEFT-GRAND ($950.01 & OVER)EXCPT,GUNS,FOWL,LIVESTK,PROD',
       'BURGLARY', 'THEFT OF IDENTITY', 'BURGLARY FROM VEHICLE',
       'THEFT, PERSON', 'BURGLARY, ATTEMPTED',
       'SHOPLIFTING-GRAND THEFT ($950.01 & OVER)', 'PURSE SNATCHING',
       'DRUNK ROLL', 'GRAND THEFT / AUTO REPAIR',
       'GRAND THEFT / INSURANCE FRAUD',
       'DISHONEST EMPLOYEE - GRAND THEFT'], dtype=object)

In [44]:
crimes4 = dates_cleaned.loc[(dates_cleaned["crime_id"] >= 400) & (dates_cleaned["crime_id"] < 500) ]

In [45]:
crimes4['crime_id'].unique()

array([442, 440, 420, 437, 438, 480, 434, 441, 410, 436, 487, 439, 433,
       443, 474, 421, 450, 452, 471, 470, 473, 432, 435, 444, 445, 451,
       453], dtype=int64)

In [46]:
crimes4['crime_desc'].unique()

array(['SHOPLIFTING - PETTY THEFT ($950 & UNDER)',
       'THEFT PLAIN - PETTY ($950 & UNDER)',
       'THEFT FROM MOTOR VEHICLE - PETTY ($950 & UNDER)',
       'RESISTING ARREST', 'RECKLESS DRIVING', 'BIKE - STOLEN',
       'FALSE IMPRISONMENT', 'THEFT PLAIN - ATTEMPT',
       'BURGLARY FROM VEHICLE, ATTEMPTED', 'LYNCHING - ATTEMPTED',
       'BOAT - STOLEN', 'FALSE POLICE REPORT',
       'DRIVING WITHOUT OWNER CONSENT (DWOC)', 'SHOPLIFTING - ATTEMPT',
       'THEFT, COIN MACHINE - PETTY ($950 & UNDER)',
       'THEFT FROM MOTOR VEHICLE - ATTEMPT',
       'THEFT FROM PERSON - ATTEMPT', 'PICKPOCKET, ATTEMPT',
       'TILL TAP - PETTY ($950 & UNDER)',
       'TILL TAP - GRAND THEFT ($950.01 & OVER)',
       'THEFT, COIN MACHINE - GRAND ($950.01 & OVER)',
       'BLOCKING DOOR INDUCTION CENTER', 'LYNCHING',
       'DISHONEST EMPLOYEE - PETTY THEFT',
       'DISHONEST EMPLOYEE ATTEMPTED THEFT', 'PURSE SNATCHING - ATTEMPT',
       'DRUNK ROLL - ATTEMPT'], dtype=object)

In [47]:
crimes5 = dates_cleaned.loc[(dates_cleaned["crime_id"] >= 500) & (dates_cleaned["crime_id"] < 600) ]

In [48]:
crimes5['crime_id'].unique()

array([510, 520, 522], dtype=int64)

In [49]:
crimes5['crime_desc'].unique()

array(['VEHICLE - STOLEN', 'VEHICLE - ATTEMPT STOLEN',
       'VEHICLE, STOLEN - OTHER (MOTORIZED SCOOTERS, BIKES, ETC)'],
      dtype=object)

In [50]:
crimes6 = dates_cleaned.loc[(dates_cleaned["crime_id"] >= 600) & (dates_cleaned["crime_id"] < 700) ]

In [51]:
crimes6['crime_id'].unique()

array([626, 625, 624, 648, 666, 662, 668, 670, 627, 649, 664, 647, 623,
       622, 661, 660, 654, 651, 653, 652], dtype=int64)

In [52]:
crimes6['crime_desc'].unique()

array(['INTIMATE PARTNER - SIMPLE ASSAULT', 'OTHER ASSAULT',
       'BATTERY - SIMPLE ASSAULT', 'ARSON', 'BUNCO, ATTEMPT',
       'BUNCO, GRAND THEFT', 'EMBEZZLEMENT, GRAND THEFT ($950.01 & OVER)',
       'EMBEZZLEMENT, PETTY THEFT ($950 & UNDER)',
       'CHILD ABUSE (PHYSICAL) - SIMPLE ASSAULT',
       'DOCUMENT FORGERY / STOLEN FELONY', 'BUNCO, PETTY THEFT',
       'THROWING OBJECT AT MOVING VEHICLE', 'BATTERY POLICE (SIMPLE)',
       'BATTERY ON A FIREFIGHTER', 'UNAUTHORIZED COMPUTER ACCESS',
       'COUNTERFEIT', 'CREDIT CARDS, FRAUD USE ($950 & UNDER',
       'DOCUMENT WORTHLESS ($200.01 & OVER)',
       'CREDIT CARDS, FRAUD USE ($950.01 & OVER)',
       'DOCUMENT WORTHLESS ($200 & UNDER)'], dtype=object)

In [53]:
crimes7 = dates_cleaned.loc[(dates_cleaned["crime_id"] >= 700) & (dates_cleaned["crime_id"] < 800) ]

In [54]:
crimes7['crime_id'].unique()

array([761, 740, 745, 753, 763, 760, 755, 762, 756], dtype=int64)

In [55]:
crimes7['crime_desc'].unique()

array(['BRANDISH WEAPON',
       'VANDALISM - FELONY ($400 & OVER, ALL CHURCH VANDALISMS)',
       'VANDALISM - MISDEAMEANOR ($399 OR UNDER)',
       'DISCHARGE FIREARMS/SHOTS FIRED', 'STALKING',
       'LEWD/LASCIVIOUS ACTS WITH CHILD', 'BOMB SCARE', 'LEWD CONDUCT',
       'WEAPONS POSSESSION/BOMBING'], dtype=object)

In [56]:
crimes8 = dates_cleaned.loc[(dates_cleaned["crime_id"] >= 800) & (dates_cleaned["crime_id"] < 900) ]

In [57]:
crimes8['crime_id'].unique()

array([888, 820, 860, 806, 886, 813, 845, 850, 822, 810, 812, 890, 815,
       821, 814, 882, 870, 830, 805, 880, 840, 884, 865], dtype=int64)

In [58]:
crimes8['crime_desc'].unique()

array(['TRESPASSING', 'ORAL COPULATION', 'BATTERY WITH SEXUAL CONTACT',
       'PANDERING', 'DISTURBING THE PEACE',
       'CHILD ANNOYING (17YRS & UNDER)',
       'SEX OFFENDER REGISTRANT OUT OF COMPLIANCE', 'INDECENT EXPOSURE',
       'HUMAN TRAFFICKING - COMMERCIAL SEX ACTS',
       'SEX,UNLAWFUL(INC MUTUAL CONSENT, PENETRATION W/ FRGN OBJ',
       'CRM AGNST CHLD (13 OR UNDER) (14-15 & SUSP 10 YRS OLDER)',
       'FAILURE TO YIELD', 'SEXUAL PENETRATION W/FOREIGN OBJECT',
       'SODOMY/SEXUAL CONTACT B/W PENIS OF ONE PERS TO ANUS OTH',
       'CHILD PORNOGRAPHY', 'INCITING A RIOT', 'CHILD ABANDONMENT',
       'INCEST (SEXUAL ACTS BETWEEN BLOOD RELATIVES)', 'PIMPING',
       'DISRUPT SCHOOL',
       'BEASTIALITY, CRIME AGAINST NATURE SEXUAL ASSLT WITH ANIM',
       'FAILURE TO DISPERSE', 'DRUGS, TO A MINOR'], dtype=object)

In [59]:
crimes9 = dates_cleaned.loc[(dates_cleaned["crime_id"] >= 900) & (dates_cleaned["crime_id"] < 1000) ]

In [60]:
crimes9['crime_id'].unique()

array([901, 930, 956, 940, 900, 946, 903, 933, 910, 921, 951, 949, 922,
       902, 928, 932, 943, 920, 944, 948, 950, 954, 942, 931, 924, 926],
      dtype=int64)

In [61]:
crimes9['crime_desc'].unique()

array(['VIOLATION OF RESTRAINING ORDER',
       'CRIMINAL THREATS - NO WEAPON DISPLAYED',
       'LETTERS, LEWD  -  TELEPHONE CALLS, LEWD', 'EXTORTION',
       'VIOLATION OF COURT ORDER', 'OTHER MISCELLANEOUS CRIME',
       'CONTEMPT OF COURT', 'PROWLER', 'KIDNAPPING',
       'HUMAN TRAFFICKING - INVOLUNTARY SERVITUDE',
       'DEFRAUDING INNKEEPER/THEFT OF SERVICES, $950 & UNDER',
       'ILLEGAL DUMPING', 'CHILD STEALING',
       'VIOLATION OF TEMPORARY RESTRAINING ORDER',
       'THREATENING PHONE CALLS/LETTERS', 'PEEPING TOM',
       'CRUELTY TO ANIMALS', 'KIDNAPPING - GRAND ATTEMPT', 'CONSPIRACY',
       'BIGAMY', 'DEFRAUDING INNKEEPER/THEFT OF SERVICES, OVER $950.01',
       'CONTRIBUTING', 'BRIBERY',
       'REPLICA FIREARMS(SALE,DISPLAY,MANUFACTURE OR DISTRIBUTE)',
       'TELEPHONE PROPERTY - DAMAGE', 'TRAIN WRECKING'], dtype=object)

In [62]:
division_names = dates_cleaned[['div_name', 'division']].drop_duplicates()
division_names

Unnamed: 0,div_name,division
644787,Hollywood,6
644788,Central,1
644789,N Hollywood,15
644792,Topanga,21
644794,Harbor,5
644797,Olympic,20
644799,77th Street,12
644800,Mission,19
644802,Rampart,2
644804,West LA,8


In [63]:
# Export categories_df and subcategories_df as CSV files.
division_names.to_csv("divisions.csv", index=False)