In [1]:
import pandas as pd
import numpy as np
import math

In [2]:
#Loads crime data from LA and displays it
crime = pd.read_csv('la_crime.csv')
crime

Unnamed: 0,DR_NO,Date Rptd,DATE OCC,TIME OCC,AREA,AREA NAME,Rpt Dist No,Part 1-2,Crm Cd,Crm Cd Desc,...,Status,Status Desc,Crm Cd 1,Crm Cd 2,Crm Cd 3,Crm Cd 4,LOCATION,Cross Street,LAT,LON
0,190326475,03/01/2020 12:00:00 AM,03/01/2020 12:00:00 AM,2130,7,Wilshire,784,1,510,VEHICLE - STOLEN,...,AA,Adult Arrest,510.0,998.0,,,1900 S LONGWOOD AV,,34.0375,-118.3506
1,200106753,02/09/2020 12:00:00 AM,02/08/2020 12:00:00 AM,1800,1,Central,182,1,330,BURGLARY FROM VEHICLE,...,IC,Invest Cont,330.0,998.0,,,1000 S FLOWER ST,,34.0444,-118.2628
2,200320258,11/11/2020 12:00:00 AM,11/04/2020 12:00:00 AM,1700,3,Southwest,356,1,480,BIKE - STOLEN,...,IC,Invest Cont,480.0,,,,1400 W 37TH ST,,34.0210,-118.3002
3,200907217,05/10/2023 12:00:00 AM,03/10/2020 12:00:00 AM,2037,9,Van Nuys,964,1,343,SHOPLIFTING-GRAND THEFT ($950.01 & OVER),...,IC,Invest Cont,343.0,,,,14000 RIVERSIDE DR,,34.1576,-118.4387
4,220614831,08/18/2022 12:00:00 AM,08/17/2020 12:00:00 AM,1200,6,Hollywood,666,2,354,THEFT OF IDENTITY,...,IC,Invest Cont,354.0,,,,1900 TRANSIENT,,34.0944,-118.3277
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
990288,240710284,07/24/2024 12:00:00 AM,07/23/2024 12:00:00 AM,1400,7,Wilshire,788,1,510,VEHICLE - STOLEN,...,IC,Invest Cont,510.0,,,,4000 W 23RD ST,,34.0362,-118.3284
990289,240104953,01/15/2024 12:00:00 AM,01/15/2024 12:00:00 AM,100,1,Central,101,2,745,VANDALISM - MISDEAMEANOR ($399 OR UNDER),...,IC,Invest Cont,745.0,,,,1300 W SUNSET BL,,34.0685,-118.2460
990290,240410786,10/14/2024 12:00:00 AM,10/11/2024 12:00:00 AM,2330,4,Hollenbeck,421,1,341,"THEFT-GRAND ($950.01 & OVER)EXCPT,GUNS,FOWL,LI...",...,IC,Invest Cont,341.0,,,,1700 ALBION ST,,34.0675,-118.2240
990291,240309674,04/24/2024 12:00:00 AM,04/24/2024 12:00:00 AM,1500,3,Southwest,358,1,230,"ASSAULT WITH DEADLY WEAPON, AGGRAVATED ASSAULT",...,IC,Invest Cont,230.0,,,,FLOWER ST,JEFFERSON BL,34.0215,-118.2868


In [3]:
#Filters columns in dataframe to ones relevant to our project
crime_cf = crime.get(['DATE OCC', 'AREA', 'AREA NAME','Crm Cd', 'Status Desc', 'LOCATION', 'LAT', 'LON'])
crime_cf

Unnamed: 0,DATE OCC,AREA,AREA NAME,Crm Cd,Status Desc,LOCATION,LAT,LON
0,03/01/2020 12:00:00 AM,7,Wilshire,510,Adult Arrest,1900 S LONGWOOD AV,34.0375,-118.3506
1,02/08/2020 12:00:00 AM,1,Central,330,Invest Cont,1000 S FLOWER ST,34.0444,-118.2628
2,11/04/2020 12:00:00 AM,3,Southwest,480,Invest Cont,1400 W 37TH ST,34.0210,-118.3002
3,03/10/2020 12:00:00 AM,9,Van Nuys,343,Invest Cont,14000 RIVERSIDE DR,34.1576,-118.4387
4,08/17/2020 12:00:00 AM,6,Hollywood,354,Invest Cont,1900 TRANSIENT,34.0944,-118.3277
...,...,...,...,...,...,...,...,...
990288,07/23/2024 12:00:00 AM,7,Wilshire,510,Invest Cont,4000 W 23RD ST,34.0362,-118.3284
990289,01/15/2024 12:00:00 AM,1,Central,745,Invest Cont,1300 W SUNSET BL,34.0685,-118.2460
990290,10/11/2024 12:00:00 AM,4,Hollenbeck,341,Invest Cont,1700 ALBION ST,34.0675,-118.2240
990291,04/24/2024 12:00:00 AM,3,Southwest,230,Invest Cont,FLOWER ST,34.0215,-118.2868


In [4]:
#Filters rows to only inlcude crimes that are relevant to guardia, using crime codes
codes = [
    187,    # Murder
    207,    # Kidnapping
    211,    # Robbery
    215,    # Carjacking
    240,    # Assault
    242,    # Battery
    245,    # Assault with a Deadly Weapon
    261,    # Rape
    288,    # Lewd Acts with a Child
    314,    # Indecent Exposure
    368,    # Elder or Dependent Adult Abuse
    415,    # Disturbing the Peace
    417,    # Brandishing a Weapon
    422,    # Criminal Threats
    451,    # Arson
    459,    # Burglary
    470,    # Forgery
    484,    # Theft
    487,    # Grand Theft
    488,    # Petty Theft
    496,    # Receiving Stolen Property
    537,    # Defrauding an Innkeeper
    594,    # Vandalism
    602,    # Trespassing
    647,    # Disorderly Conduct
    664,    # Attempted Crimes
    11350,  # Possession of Controlled Substances
    10851   # Vehicle Theft
]

codes = np.array(codes)
crime_rf = crime_cf[crime_cf['Crm Cd'].isin(codes)]
crime_rf.sort_values('Crm Cd')

Unnamed: 0,DATE OCC,AREA,AREA NAME,Crm Cd,Status Desc,LOCATION,LAT,LON
494433,06/12/2022 12:00:00 AM,15,N Hollywood,451,Invest Cont,GAULT ST,34.1994,-118.4116
616971,02/17/2022 12:00:00 AM,13,Newton,451,Invest Cont,AVALON BL,34.0153,-118.2653
161974,06/16/2020 12:00:00 AM,12,77th Street,451,Juv Other,5700 S VERMONT AV,33.9905,-118.2915
717627,07/08/2023 12:00:00 AM,13,Newton,451,Invest Cont,AVALON BL,34.0102,-118.2653
622860,12/24/2022 12:00:00 AM,3,Southwest,451,Invest Cont,1400 W ADAMS BL,34.0349,-118.2959
...,...,...,...,...,...,...,...,...
327437,12/22/2021 12:00:00 AM,16,Foothill,664,Invest Cont,6500 FOOTHILL BL,34.2519,-118.2713
327486,01/20/2021 12:00:00 AM,6,Hollywood,664,Invest Cont,7700 HOLLYWOOD BL,34.1017,-118.3574
327778,07/05/2021 12:00:00 AM,18,Southeast,664,Invest Cont,8600 S CENTRAL AV,33.9595,-118.2564
325294,07/06/2021 12:00:00 AM,14,Pacific,664,Invest Cont,8800 S SEPULVEDA BL,33.9579,-118.4020


In [5]:
crime = pd.read_csv('sd_crime.csv')
crime

Unnamed: 0,Incident UID,Incident Date,CIBRS Unique Offense ID,Crime Against Category,CIBRS Offense Code,CIBRS Offense Description,Victim UID,Victim Category,Victim Age,Victim Race,...,CIBRS Status Date,Census Tract,BCS Area,2022 Census Tract Population,2022 Zip Code Population,"ZIP Code Crime Rate Per 1,000 People",CT_Centroid,ZIP_Centroid,Unique Victims,Victim Age Bins
0,452623,01/07/2021 12:01:00 AM,452623-26B,Property,26B,Credit Card/Automatic Teller Fraud,,,,,...,2021-12-30 21:45:25.000,,OUT OF COUNTY,,,,,,1,
1,1083256,01/09/2021 07:30:00 AM,1083256-35A,Society,35A,Drug/Narcotic Violations,,,,,...,2022-06-08 17:33:54.000,,,,,,,,1,
2,435409,01/13/2021 03:00:00 PM,435409-23H,Property,23H,All Other Larceny,,,,,...,2021-12-30 17:23:07.999,,,,34003.0,0.029,,POINT (-117.16293543470877 32.745320835953024),1,
3,736456,01/17/2021 05:05:00 PM,736456-13A-758591,People,13A,Aggravated Assault,758591.0,I,34.0,HISPANIC,...,2021-12-31 11:07:41.000,,DETENTION FACILITY,,,,,,1,g) 30-34
4,1249353,01/28/2021 09:31:00 PM,1249353-35A,Society,35A,Drug/Narcotic Violations,,,,,...,2022-07-19 10:10:25.999,,,,33191.0,0.030,,POINT (-117.39278383065854 33.35683989585762),1,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
510584,2333674,09/29/2024 07:00:00 PM,2333674-13B-2046757,People,13B,Simple Assault,2046757.0,I,55.0,BLACK,...,2024-09-30 16:18:31.000,4.00,,4270.0,34003.0,0.029,POINT (-117.16294602963433 32.75323463050032),POINT (-117.16293543470877 32.745320835953024),1,l) 55-59
510585,2334461,09/28/2024 12:00:01 AM,2334461-13A-2047403,People,13A,Aggravated Assault,2047403.0,I,37.0,HISPANIC,...,2024-10-01 10:14:40.999,76.02,,1781.0,45561.0,0.022,POINT (-117.25011244725232 32.76662199090993),POINT (-117.23328805118283 32.79627711318137),1,h) 35-39
510586,2337500,09/30/2024 03:10:00 PM,2337500-120,Property,120,Robbery,,,,,...,2024-10-03 10:22:24.999,105.02,CITY OF IMPERIAL BEACH,5177.0,26275.0,0.038,POINT (-117.11291629315383 32.5860203137456),POINT (-117.11711989601122 32.57109719065306),0,
510587,2331943,09/27/2024 03:20:00 PM,2331943-35A,Society,35A,Drug/Narcotic Violations,,,,,...,2024-10-03 11:20:05.999,51.02,,4314.0,51482.0,0.019,POINT (-117.15400405344477 32.70904613738133),POINT (-117.1798054992675 32.71654211170926),1,


In [6]:
crime = crime[crime['Crime Against Category'] == 'People']
crime

Unnamed: 0,Incident UID,Incident Date,CIBRS Unique Offense ID,Crime Against Category,CIBRS Offense Code,CIBRS Offense Description,Victim UID,Victim Category,Victim Age,Victim Race,...,CIBRS Status Date,Census Tract,BCS Area,2022 Census Tract Population,2022 Zip Code Population,"ZIP Code Crime Rate Per 1,000 People",CT_Centroid,ZIP_Centroid,Unique Victims,Victim Age Bins
3,736456,01/17/2021 05:05:00 PM,736456-13A-758591,People,13A,Aggravated Assault,758591.0,I,34.0,HISPANIC,...,2021-12-31 11:07:41.000,,DETENTION FACILITY,,,,,,1,g) 30-34
20,640887,02/16/2021 09:00:00 PM,640887-13B-657267,People,13B,Simple Assault,657267.0,I,44.0,HISPANIC,...,2022-01-01 04:47:39.000,,DETENTION FACILITY,,,,,,1,i) 40-44
23,661987,02/18/2021 05:40:45 PM,661987-13A-679864,People,13A,Aggravated Assault,679864.0,I,22.0,HISPANIC,...,2022-01-01 07:29:38.000,,,,37960.0,0.026,,POINT (-117.25744919022421 32.84663254858464),1,e) 20-24
27,657630,02/25/2021 01:00:00 AM,657630-13B-675108,People,13B,Simple Assault,675108.0,I,38.0,WHITE,...,2022-01-01 07:15:05.999,,DETENTION FACILITY,,,,,,1,h) 35-39
28,657631,02/25/2021 05:56:00 PM,657631-13B-675110,People,13B,Simple Assault,675110.0,I,33.0,HISPANIC,...,2022-01-01 07:15:12.999,,DETENTION FACILITY,,,,,,1,g) 30-34
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
510573,2332703,09/30/2024 12:56:10 AM,2332703-13B-2045943,People,13B,Simple Assault,2045943.0,I,59.0,HISPANIC,...,2024-09-30 04:07:28.999,48.00,,3109.0,48645.0,0.021,POINT (-117.13814231177453 32.70845460584856),POINT (-117.12067829999555 32.693011151692495),1,l) 55-59
510574,2332428,09/29/2024 01:44:00 AM,2332428-13A-2045641,People,13A,Aggravated Assault,2045641.0,I,,,...,2024-09-29 10:09:53.000,100.01,,4455.0,84897.0,0.012,POINT (-117.0554431101726 32.58681509425518),POINT (-116.97504946156067 32.57385941287214),1,
510576,2341549,09/29/2024 11:19:00 PM,2341549-13A-2053249,People,13A,Aggravated Assault,2053249.0,I,20.0,HISPANIC,...,2024-10-07 16:17:52.999,191.05,VALLEY CENTER,7186.0,19513.0,0.051,POINT (-117.09667669439708 33.23059021186462),POINT (-117.00267067696717 33.26358696243767),1,e) 20-24
510584,2333674,09/29/2024 07:00:00 PM,2333674-13B-2046757,People,13B,Simple Assault,2046757.0,I,55.0,BLACK,...,2024-09-30 16:18:31.000,4.00,,4270.0,34003.0,0.029,POINT (-117.16294602963433 32.75323463050032),POINT (-117.16293543470877 32.745320835953024),1,l) 55-59


In [7]:
crime.columns

Index(['Incident UID', 'Incident Date', 'CIBRS Unique Offense ID',
       'Crime Against Category', 'CIBRS Offense Code',
       'CIBRS Offense Description', 'Victim UID', 'Victim Category',
       'Victim Age', 'Victim Race', 'Victim Sex', 'Stolen Vehicles', 'Beat',
       'City', 'Zip Code', 'Domestic Violence Incident', 'Agency',
       'Unique Incidents', 'Unique Offenses', 'CIBRS Status',
       'CIBRS Status Date', 'Census Tract', 'BCS Area',
       '2022 Census Tract Population', '2022 Zip Code Population',
       'ZIP Code Crime Rate Per 1,000 People', 'CT_Centroid', 'ZIP_Centroid',
       'Unique Victims', 'Victim Age Bins'],
      dtype='object')

In [8]:
crime = crime[['Incident UID', 'Incident Date', 'Crime Against Category', 'Zip Code', 'ZIP_Centroid']]
crime

Unnamed: 0,Incident UID,Incident Date,Crime Against Category,Zip Code,ZIP_Centroid
3,736456,01/17/2021 05:05:00 PM,People,,
20,640887,02/16/2021 09:00:00 PM,People,,
23,661987,02/18/2021 05:40:45 PM,People,92037.0,POINT (-117.25744919022421 32.84663254858464)
27,657630,02/25/2021 01:00:00 AM,People,,
28,657631,02/25/2021 05:56:00 PM,People,,
...,...,...,...,...,...
510573,2332703,09/30/2024 12:56:10 AM,People,92113.0,POINT (-117.12067829999555 32.693011151692495)
510574,2332428,09/29/2024 01:44:00 AM,People,92154.0,POINT (-116.97504946156067 32.57385941287214)
510576,2341549,09/29/2024 11:19:00 PM,People,92082.0,POINT (-117.00267067696717 33.26358696243767)
510584,2333674,09/29/2024 07:00:00 PM,People,92103.0,POINT (-117.16293543470877 32.745320835953024)


In [9]:
crime = crime.dropna(subset = ['Zip Code'])
crime

Unnamed: 0,Incident UID,Incident Date,Crime Against Category,Zip Code,ZIP_Centroid
23,661987,02/18/2021 05:40:45 PM,People,92037.0,POINT (-117.25744919022421 32.84663254858464)
73,721771,05/04/2021 06:54:00 PM,People,92011.0,POINT (-117.29545435808339 33.10813455386477)
96,941769,06/05/2021 09:20:00 AM,People,92020.0,POINT (-116.96612641411997 32.794739363541865)
147,976822,08/04/2021 03:22:00 PM,People,92109.0,POINT (-117.23328805118283 32.79627711318137)
194,2163563,10/28/2021 11:20:00 AM,People,92101.0,POINT (-117.1798054992675 32.71654211170926)
...,...,...,...,...,...
510573,2332703,09/30/2024 12:56:10 AM,People,92113.0,POINT (-117.12067829999555 32.693011151692495)
510574,2332428,09/29/2024 01:44:00 AM,People,92154.0,POINT (-116.97504946156067 32.57385941287214)
510576,2341549,09/29/2024 11:19:00 PM,People,92082.0,POINT (-117.00267067696717 33.26358696243767)
510584,2333674,09/29/2024 07:00:00 PM,People,92103.0,POINT (-117.16293543470877 32.745320835953024)


In [37]:
grouped_crime = crime.groupby('Zip Code').count().sort_values('Incident UID', ascending = False)
grouped_crime = grouped_crime.get('Incident UID')
grouped_crime = grouped_crime.assign()
grouped_crime

Zip Code
92101.0    10885
91910.0     3711
92113.0     3602
92054.0     3490
91950.0     3353
           ...  
92134.0        5
91948.0        5
92155.0        4
92118.0        2
92096.0        1
Name: Incident UID, Length: 112, dtype: int64

KeyError: 'zipcode_column_in_geojson'

In [36]:
# Assuming your Series is named crime_series
crime_series = crime_series.rename_axis('Zip Code').reset_index(name='Crime Count')
crime_series.to_json("crime_data.json", orient="records")
