In [13]:
import pandas as pd
from datetime import datetime
import jovian_project.data_cleaning as dc

In [14]:
#read a csv into dataframe
url = 'https://data.lacity.org/resource/2nrs-mtv8.json'
crime_df = pd.read_json(url)

In [15]:
dc.edit_column(crime_df, 'time_occ', dc.clean_military_time)

In [25]:
cleaned_crime_df= crime_df.copy()

In [26]:
cleaned_crime_df['time_occ']=pd.to_datetime(cleaned_crime_df['time_occ'], format='%H:%M').dt.time

In [27]:
# Dropping the Weapon Used Code, Status, Crime Code 1 columns as they are redundant
cleaned_crime_df.drop(columns=['weapon_used_cd', 'status', 'crm_cd_1'], inplace=True)


In [28]:
# Filling missing values in Weapon Description column with 'UNKNOWN WEAPON/OTHER WEAPON'as it is already present in the Weapon Description column, Victim Sex -> X (Rerpresents unknown) and Victim Descent -> Unknown
cleaned_crime_df['weapon_desc'].fillna('UNKNOWN WEAPON/OTHER WEAPON', inplace=True)
cleaned_crime_df['vict_sex'].fillna('X', inplace=True)
cleaned_crime_df['vict_descent'].fillna('Unknown', inplace=True)


In [34]:
#Descent of victim was represented by a single letter code which was not very intuitive. So, we replaced the codes with the actual description of the descent.
#make a dictionary of Descent Codes and Descent Description
descent_dict = {'A': 'Other Asian', 'B': 'Black', 
                'C': 'Chinese', 'D': 'Cambodian', 
                'F': 'Filipino', 'G': 'Guamanian', 
                'H': 'Hispanic/Latin/Mexican', 
                'I': 'American Indian/Alaskan Native', 
                'J': 'Japanese', 'K': 'Korean', 'L': 'Laotian', 
                'O': 'Other', 'P': 'Pacific Islander', 
                'S': 'Samoan', 'U': 'Hawaiian', 'V': 'Vietnamese', 
                'W': 'White', 'X': 'Unknown', 
                'Z': 'Asian Indian'}
# replace the Descent Codes with Descent Description
cleaned_crime_df['vict_descent'].replace(descent_dict, inplace=True)

In [31]:
# Convert date_occ and date_rptd to datetime objects and remove the time component beacause it was the same for all the rows.
cleaned_crime_df['date_occ'] = pd.to_datetime(cleaned_crime_df['date_occ'], format='%Y-%m-%dT%H:%M:%S.%f').dt.date
cleaned_crime_df['date_rptd'] = pd.to_datetime(cleaned_crime_df['date_rptd'], format='%Y-%m-%dT%H:%M:%S.%f').dt.date

datetime.date(2020, 1, 1)

In [36]:
cleaned_crime_df['vict_descent'][:30]

0                      Black
1     Hispanic/Latin/Mexican
2                    Unknown
3                      White
4                    Unknown
5     Hispanic/Latin/Mexican
6     Hispanic/Latin/Mexican
7                    Unknown
8                      Black
9                    Unknown
10               Other Asian
11                     Other
12               Other Asian
13                   Unknown
14    Hispanic/Latin/Mexican
15    Hispanic/Latin/Mexican
16    Hispanic/Latin/Mexican
17                     White
18                     Black
19                     Black
20                     Other
21    Hispanic/Latin/Mexican
22                     Black
23                     White
24               Other Asian
25                     Black
26               Other Asian
27                     White
28                     White
29                     Black
Name: vict_descent, dtype: object

In [32]:
cleaned_crime_df.head()

Unnamed: 0,dr_no,date_rptd,date_occ,time_occ,area,area_name,rpt_dist_no,part_1_2,crm_cd,crm_cd_desc,...,vict_descent,premis_cd,premis_desc,weapon_desc,status_desc,location,lat,lon,crm_cd_2,cross_street
0,10304468,2020-01-08,2020-01-08,22:30:00,3,Southwest,377,2,624,BATTERY - SIMPLE ASSAULT,...,B,501,SINGLE FAMILY DWELLING,"STRONG-ARM (HANDS, FIST, FEET OR BODILY FORCE)",Adult Other,1100 W 39TH PL,34.0141,-118.2978,,
1,190101086,2020-01-02,2020-01-01,03:30:00,1,Central,163,2,624,BATTERY - SIMPLE ASSAULT,...,H,102,SIDEWALK,UNKNOWN WEAPON/OTHER WEAPON,Invest Cont,700 S HILL ST,34.0459,-118.2545,,
2,200110444,2020-04-14,2020-02-13,12:00:00,1,Central,155,2,845,SEX OFFENDER REGISTRANT OUT OF COMPLIANCE,...,X,726,POLICE FACILITY,UNKNOWN WEAPON/OTHER WEAPON,Adult Arrest,200 E 6TH ST,34.0448,-118.2474,,
3,191501505,2020-01-01,2020-01-01,17:30:00,15,N Hollywood,1543,2,745,VANDALISM - MISDEAMEANOR ($399 OR UNDER),...,W,502,"MULTI-UNIT DWELLING (APARTMENT, DUPLEX, ETC)",UNKNOWN WEAPON/OTHER WEAPON,Invest Cont,5400 CORTEEN PL,34.1685,-118.4019,998.0,
4,191921269,2020-01-01,2020-01-01,04:15:00,19,Mission,1998,2,740,"VANDALISM - FELONY ($400 & OVER, ALL CHURCH VA...",...,X,409,BEAUTY SUPPLY STORE,UNKNOWN WEAPON/OTHER WEAPON,Invest Cont,14400 TITUS ST,34.2198,-118.4468,,
