In [1]:
### Import Dependencies

In [2]:
import pandas as pd
import numpy as np
from datetime import datetime


In [3]:
### Read data from crimecards.dc.gov, for the last year 2/1/22 - 2/5/23

In [4]:
# identify available columns
DC_crime = pd.read_csv('Resources/one_year_dc-crimes-search-results.csv')
DC_crime.columns

Index(['NEIGHBORHOOD_CLUSTER', 'offensegroup', 'CENSUS_TRACT', 'LONGITUDE',
       'END_DATE', 'offense-text', 'YBLOCK', 'DISTRICT', 'SHIFT', 'WARD',
       'YEAR', 'offensekey', 'BID', 'sector', 'PSA', 'ucr-rank', 'BLOCK_GROUP',
       'VOTING_PRECINCT', 'XBLOCK', 'BLOCK', 'START_DATE', 'CCN', 'OFFENSE',
       'OCTO_RECORD_ID', 'ANC', 'REPORT_DAT', 'METHOD', 'location',
       'LATITUDE'],
      dtype='object')

In [5]:
# convert data into a DataFram
DC_crime_data = pd.DataFrame(DC_crime)

# organize columns
DC_crime_data = DC_crime_data[['YEAR', 'START_DATE', 'offensegroup', 'OFFENSE', 'offense-text', 'offensekey']]

DC_crime_data.head()


Unnamed: 0,YEAR,START_DATE,offensegroup,OFFENSE,offense-text,offensekey
0,2022,"6/9/2017, 12:20:00 AM",violent,homicide,homicide,violent|homicide
1,2021,"2/11/2021, 3:00:52 PM",property,theft/other,theft/other,property|theft/other
2,2021,"2/11/2021, 6:03:44 PM",violent,robbery,robbery,violent|robbery
3,2021,"2/12/2021, 5:09:33 AM",violent,robbery,robbery,violent|robbery
4,2021,"2/18/2021, 4:39:52 AM",violent,robbery,robbery,violent|robbery


In [6]:
# slit the START_DATE into two columns, one for DATE and one for TIME
DC_crime_data[['START_DATE','START_TIME']] = DC_crime_data['START_DATE'].str.split(',',expand=True)

DC_crime_data.head()

Unnamed: 0,YEAR,START_DATE,offensegroup,OFFENSE,offense-text,offensekey,START_TIME
0,2022,6/9/2017,violent,homicide,homicide,violent|homicide,12:20:00 AM
1,2021,2/11/2021,property,theft/other,theft/other,property|theft/other,3:00:52 PM
2,2021,2/11/2021,violent,robbery,robbery,violent|robbery,6:03:44 PM
3,2021,2/12/2021,violent,robbery,robbery,violent|robbery,5:09:33 AM
4,2021,2/18/2021,violent,robbery,robbery,violent|robbery,4:39:52 AM


In [7]:
# drop rows with na values
DC_crime_data = DC_crime_data.dropna()

In [8]:
# convert 'START_TIME' to same format as found with weather data
DC_crime_data['START_TIME'] = pd.to_datetime(DC_crime_data['START_TIME'], format=' %I:%M:%S %p').dt.strftime('%H:%M:%S')

In [9]:
# convert 'START_DATE' to same format as found with weather data
DC_crime_data['START_DATE'] = pd.to_datetime(DC_crime_data['START_DATE'], format='%m/%d/%Y').dt.strftime('%Y/%m/%d')
DC_crime_data.head()

Unnamed: 0,YEAR,START_DATE,offensegroup,OFFENSE,offense-text,offensekey,START_TIME
0,2022,2017/06/09,violent,homicide,homicide,violent|homicide,00:20:00
1,2021,2021/02/11,property,theft/other,theft/other,property|theft/other,15:00:52
2,2021,2021/02/11,violent,robbery,robbery,violent|robbery,18:03:44
3,2021,2021/02/12,violent,robbery,robbery,violent|robbery,05:09:33
4,2021,2021/02/18,violent,robbery,robbery,violent|robbery,04:39:52


In [10]:
DC_crime_data.groupby(DC_crime_data['offensekey']).count()

Unnamed: 0_level_0,YEAR,START_DATE,offensegroup,OFFENSE,offense-text,START_TIME
offensekey,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
property|arson,10,10,10,10,10,10
property|burglary,2242,2242,2242,2242,2242,2242
property|motor vehicle theft,7562,7562,7562,7562,7562,7562
property|theft f/auto,16474,16474,16474,16474,16474,16474
property|theft/other,22019,22019,22019,22019,22019,22019
violent|assault w/dangerous weapon,3037,3037,3037,3037,3037,3037
violent|homicide,421,421,421,421,421,421
violent|robbery,4139,4139,4139,4139,4139,4139
violent|sex abuse,338,338,338,338,338,338


In [11]:
#### Export clean crime data to new cvs file

In [12]:
DC_crime_data.to_csv("Resources/clean_crime_data.csv")

In [13]:
DC_crime_data = pd.read_csv("Resources/clean_crime_data.csv")
DC_crime_data.head()

Unnamed: 0.1,Unnamed: 0,YEAR,START_DATE,offensegroup,OFFENSE,offense-text,offensekey,START_TIME
0,0,2022,2017/06/09,violent,homicide,homicide,violent|homicide,00:20:00
1,1,2021,2021/02/11,property,theft/other,theft/other,property|theft/other,15:00:52
2,2,2021,2021/02/11,violent,robbery,robbery,violent|robbery,18:03:44
3,3,2021,2021/02/12,violent,robbery,robbery,violent|robbery,05:09:33
4,4,2021,2021/02/18,violent,robbery,robbery,violent|robbery,04:39:52
