# This program downloads the cleans Milwaukee Crime data for the purposes of displaying it on a map. Link to data: https://data.milwaukee.gov/dataset/wibr

In [79]:
import pandas as pd
import numpy as np
import boto3 
data_link = 'https://data.milwaukee.gov/dataset/e5feaad3-ee73-418c-b65d-ef810c199390/resource/87843297-a6fa-46d4-ba5d-cb342fb2d3bb/download/wibr.csv'
crime = pd.read_csv(data_link)
s3 = boto3.client('s3')

In [80]:
crime.head()

Unnamed: 0,IncidentNum,ReportedDateTime,ReportedYear,ReportedMonth,Location,WeaponUsed,ALD,NSP,POLICE,TRACT,...,Arson,AssaultOffense,Burglary,CriminalDamage,Homicide,LockedVehicle,Robbery,SexOffense,Theft,VehicleTheft
0,191830044,2019-07-02 07:30:00,2019,7,1311 W MADISON ST,OTHER,12.0,16.0,2.0,16400.0,...,0,1,0,0,0,0,0,0,0,0
1,191830035,2019-07-02 05:40:00,2019,7,5228 W CENTER ST,,10.0,5.0,7.0,5900.0,...,0,1,0,0,0,0,0,0,0,0
2,191830032,2019-07-02 04:40:00,2019,7,3607 W MT VERNON AV,,10.0,14.0,3.0,13300.0,...,0,1,0,0,0,0,0,0,0,0
3,191830027,2019-07-02 03:22:00,2019,7,170 S 1ST ST,,12.0,,1.0,187400.0,...,0,0,1,0,0,0,0,0,0,0
4,191830016,2019-07-02 01:34:00,2019,7,3607 W MT VERNON AV,PERSONAL WEAPON,10.0,14.0,3.0,13300.0,...,0,1,0,0,0,0,0,0,0,0


In [81]:
#check to see if the 'IncidentNum' is a unique ID
len(crime) == len(np.unique(crime['IncidentNum']))

False

In [82]:
print(list(crime))

['IncidentNum', 'ReportedDateTime', 'ReportedYear', 'ReportedMonth', 'Location', 'WeaponUsed', 'ALD', 'NSP', 'POLICE', 'TRACT', 'WARD', 'ZIP', 'RoughX', 'RoughY', 'Arson', 'AssaultOffense', 'Burglary', 'CriminalDamage', 'Homicide', 'LockedVehicle', 'Robbery', 'SexOffense', 'Theft', 'VehicleTheft']


In [83]:
just_crimes = crime[['Arson', 'AssaultOffense', 'Burglary', 'CriminalDamage', 
                 'Homicide', 'LockedVehicle', 'Robbery', 'SexOffense', 'Theft', 'VehicleTheft']]
just_crimes = pd.DataFrame({'Crime Type': just_crimes.idxmax(axis=1)})

In [84]:
crimes_info = crime[['IncidentNum', 'ReportedDateTime', 'Location', 'WeaponUsed',
                    'ALD', 'POLICE', 'TRACT', 'WARD', 'ZIP']]
new_crimes = crimes_info.join(just_crimes)

In [85]:
new_crimes.head()

Unnamed: 0,IncidentNum,ReportedDateTime,Location,WeaponUsed,ALD,POLICE,TRACT,WARD,ZIP,Crime Type
0,191830044,2019-07-02 07:30:00,1311 W MADISON ST,OTHER,12.0,2.0,16400.0,232.0,53204.0,AssaultOffense
1,191830035,2019-07-02 05:40:00,5228 W CENTER ST,,10.0,7.0,5900.0,163.0,53210.0,AssaultOffense
2,191830032,2019-07-02 04:40:00,3607 W MT VERNON AV,,10.0,3.0,13300.0,214.0,53208.0,AssaultOffense
3,191830027,2019-07-02 03:22:00,170 S 1ST ST,,12.0,1.0,187400.0,235.0,53204.0,Burglary
4,191830016,2019-07-02 01:34:00,3607 W MT VERNON AV,PERSONAL WEAPON,10.0,3.0,13300.0,214.0,53208.0,AssaultOffense


In [86]:
#adding other geographical information to help with mapping later
new_crimes['City'] = 'Milwaukee'
new_crimes['State'] = 'WI'

In [87]:
#replace NaN with 'None'
new_crimes = new_crimes.fillna('None')

In [88]:
#upload file to S3 bucket
new_crimes.to_csv('mke_crime_data.csv')
s3.upload_file('mke_crime_data.csv', 'crime-mapping', 'mke_crime_data.csv')

None
