# Import Libraries

In [1]:
import csv
import datetime
import json
import pandas as pd
import zipfile

# Import Crime Data

Import crime data as a ZIP file. Load in as Pandas Dataframe.

In [2]:
crime_zip = zipfile.ZipFile("C:/Users/Barbieri/DSC-Program/DC-Criminalistics/dc-crime-data/dc-crime-data.csv.zip",
                            mode='r')
crime_csv = crime_zip.open('dc-crime-data.csv')

In [3]:
crime_df = pd.read_csv(crime_csv)

In [4]:
crime_df.head()

Unnamed: 0,NEIGHBORHOOD_CLUSTER,CENSUS_TRACT,offensegroup,LONGITUDE,END_DATE,offense-text,SHIFT,YBLOCK,DISTRICT,WARD,...,BLOCK,START_DATE,CCN,OFFENSE,OCTO_RECORD_ID,ANC,REPORT_DAT,METHOD,location,LATITUDE
0,cluster 5,5500.0,property,-77.052353,2009-03-30T15:50:00.000,theft/other,evening,137490.0,2.0,2.0,...,2400 - 2499 block of m street nw,2009-03-30T05:30:00.000,9041857,theft/other,09041857-01,2A,2009-03-30T20:40:00.000Z,others,"38.905263228683658,-77.052355683742746",38.905255
1,cluster 4,202.0,property,-77.062857,2009-03-30T16:30:00.000,theft/other,evening,137621.0,2.0,2.0,...,1224 - 1299 block of wisconsin avenue nw,2009-03-30T16:20:00.000,9041858,theft/other,09041858-01,2E,2009-03-30T20:34:00.000Z,others,"38.906438143434897,-77.062859682010298",38.90643
2,cluster 17,1804.0,violent,-77.027957,2009-03-30T00:00:00.000,robbery,evening,143785.0,4.0,4.0,...,5900 - 5999 block of georgia avenue nw,2009-03-30T17:00:00.000,9041900,robbery,09041900-01,4A,2009-03-30T21:42:00.000Z,others,"38.961978891670213,-77.027959395242561",38.961971
3,cluster 14,1002.0,property,-77.07682,2009-03-30T07:30:00.000,motor vehicle theft,evening,141241.0,2.0,3.0,...,3810 - 3899 block of rodman street nw,2009-03-29T07:00:00.000,9041916,motor vehicle theft,09041916-01,3C,2009-03-30T22:52:00.000Z,others,"38.939039936788873,-77.076822116815663",38.939032
4,cluster 23,8904.0,property,-76.979941,2009-03-30T20:30:00.000,theft/other,evening,137077.0,5.0,5.0,...,1600 - 1699 block of maryland avenue ne,2009-03-30T18:40:00.000,9041927,theft/other,09041927-01,5D,2009-03-30T22:40:00.000Z,others,"38.901552788657312,-76.979942787227358",38.901545


# Import Weather Data

Import ZIP files and convert weather items to a list of dictionaries.

In [5]:
weather_zip = zipfile.ZipFile("C:/Users/Barbieri/DSC-Program/DC-Criminalistics/weather-data/crime-weather-data.zip",
                              mode='r')

weather_json = weather_zip.open('crime-weather-data.json')

weather_dict = json.load(weather_json)

Move items stored in "currently" key up one level. Delete the "currently" key afterwards.

In [6]:
for w_dict in weather_dict:
    for w_entry in list(w_dict.items()):
        if w_entry[0] == 'currently':
            for ll_entry in w_entry[1].items():
                w_dict[ll_entry[0]] = ll_entry[1]

            del w_dict['currently']

Load as Pandas Dataframe.

In [7]:
weather_df = pd.DataFrame(weather_dict)

In [8]:
weather_df.head()

Unnamed: 0,apparentTemperature,cloudCover,code,dewPoint,error,humidity,icon,latitude,longitude,precipIntensity,...,precipType,pressure,summary,temperature,time,uvIndex,visibility,windBearing,windGust,windSpeed
0,43.43,1.0,,30.6,,0.54,cloudy,38.905255,-77.052353,0.0,...,,1005.88,Overcast,46.54,1238405000.0,0.0,6.28,294.0,15.25,6.22
1,57.1,0.17,,29.94,,0.35,clear-day,38.90643,-77.062857,0.0,...,,1012.76,Clear,57.1,1238444000.0,3.0,5.15,311.0,19.51,8.43
2,56.44,0.12,,30.33,,0.37,clear-day,38.961971,-77.027957,0.0,...,,1012.9,Clear,56.44,1238447000.0,2.0,4.79,308.0,18.43,8.11
3,47.9,1.0,,47.4,,0.98,fog,38.939032,-77.07682,0.0019,...,rain,999.52,Foggy,47.9,1238324000.0,0.0,1.18,65.0,2.65,0.72
4,55.91,0.06,,30.21,,0.37,clear-day,38.901545,-76.979941,0.0,...,,1014.33,Clear,55.91,1238453000.0,0.0,4.71,313.0,16.67,7.44


# Merge Crime and Weather Data

### Compare Dataframes

Notice that there are 1014 more instances in the crime data than the weather data. This is likely due to (1) change in underlying crime data used for this analysis, (2) data connection error while compiling from Dark Sky API, (3) No data available from Dark Sky API.

In [9]:
weather_df_len = len(weather_df)
crime_df_len = len(crime_df)

print(weather_df_len, crime_df_len, weather_df_len-crime_df_len)

380053 381067 -1014


### Clean Weather Data

Remove negative time values.

In [10]:
weather_df = weather_df[weather_df['time'] > 0]

Convert time value to format used by crime data.

In [11]:
def converttime(row):
    time = datetime.datetime.fromtimestamp(row).strftime('%Y-%m-%dT%H:%M:%S.000')

    return time

In [12]:
weather_df['c_time'] = weather_df['time'].apply(converttime)

### Merge Crime and Weather

In [13]:
crime_weather_merge = crime_df.merge(weather_df,
                                     how='left',
                                     left_on=['LATITUDE','LONGITUDE','START_DATE'],
                                     right_on=['latitude','longitude','c_time'])

In [14]:
crime_weather_merge.head()

Unnamed: 0,NEIGHBORHOOD_CLUSTER,CENSUS_TRACT,offensegroup,LONGITUDE,END_DATE,offense-text,SHIFT,YBLOCK,DISTRICT,WARD,...,pressure,summary,temperature,time,uvIndex,visibility,windBearing,windGust,windSpeed,c_time
0,cluster 5,5500.0,property,-77.052353,2009-03-30T15:50:00.000,theft/other,evening,137490.0,2.0,2.0,...,1005.88,Overcast,46.54,1238405000.0,0.0,6.28,294.0,15.25,6.22,2009-03-30T05:30:00.000
1,cluster 4,202.0,property,-77.062857,2009-03-30T16:30:00.000,theft/other,evening,137621.0,2.0,2.0,...,1012.76,Clear,57.1,1238444000.0,3.0,5.15,311.0,19.51,8.43,2009-03-30T16:20:00.000
2,cluster 17,1804.0,violent,-77.027957,2009-03-30T00:00:00.000,robbery,evening,143785.0,4.0,4.0,...,1012.9,Clear,56.44,1238447000.0,2.0,4.79,308.0,18.43,8.11,2009-03-30T17:00:00.000
3,cluster 14,1002.0,property,-77.07682,2009-03-30T07:30:00.000,motor vehicle theft,evening,141241.0,2.0,3.0,...,999.52,Foggy,47.9,1238324000.0,0.0,1.18,65.0,2.65,0.72,2009-03-29T07:00:00.000
4,cluster 23,8904.0,property,-76.979941,2009-03-30T20:30:00.000,theft/other,evening,137077.0,5.0,5.0,...,1014.33,Clear,55.91,1238453000.0,0.0,4.71,313.0,16.67,7.44,2009-03-30T18:40:00.000
