In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import json

# EXTRACT CHICAGO CRIME DATA (CSV)

In [2]:
# Input Crime File (CSV)
crime_df = pd.read_csv('chicago_crime.csv')
crime_df.head()

Unnamed: 0,ID,Case Number,Date,Block,IUCR,Primary Type,Description,Location Description,Arrest,Domestic,...,Ward,Community Area,FBI Code,X Coordinate,Y Coordinate,Year,Updated On,Latitude,Longitude,Location
0,11552657,JC100019,12/31/2018 11:30:00 PM,004XX S CLINTON ST,0313,ROBBERY,ARMED: OTHER DANGEROUS WEAPON,CTA PLATFORM,False,False,...,25.0,28.0,03,1172803.0,1898062.0,2018,01/10/2019 03:16:50 PM,41.875718,-87.640994,"(41.875717757, -87.64099364)"
1,11552595,JB574410,12/31/2018 11:27:00 PM,041XX W 79TH ST,051A,ASSAULT,AGGRAVATED: HANDGUN,SIDEWALK,False,False,...,18.0,70.0,04A,1150185.0,1851851.0,2018,01/10/2019 03:16:50 PM,41.749379,-87.725239,"(41.74937879, -87.725239484)"
2,11552585,JB574353,12/31/2018 10:50:00 PM,008XX W 59TH ST,0557,ASSAULT,AGG PRO.EMP:KNIFE/CUTTING INST,GAS STATION,False,False,...,16.0,68.0,04A,1171833.0,1865730.0,2018,01/10/2019 03:16:50 PM,41.787017,-87.645506,"(41.78701723, -87.645505595)"
3,11552598,JC100015,12/31/2018 10:33:00 PM,060XX S MASON AVE,051B,ASSAULT,AGGRAVATED: OTHER FIREARM,SIDEWALK,True,False,...,13.0,64.0,04A,1137767.0,1863898.0,2018,01/10/2019 03:16:50 PM,41.78267,-87.770456,"(41.782669846, -87.77045603)"
4,11553505,JC101102,12/31/2018 10:07:00 PM,078XX S HOYNE AVE,031A,ROBBERY,ARMED: HANDGUN,STREET,False,False,...,18.0,71.0,03,1163724.0,1852701.0,2018,01/10/2019 03:16:50 PM,41.751438,-87.675603,"(41.751438029, -87.675602996)"


# TRANSFORM CHICAGO CRIME DATA

In [3]:
#Drop crime_df columns: ID, Case Number, Block, IUCR, Domestic, Ward, X Coordinate, Y Coordinate, Year, Updated On, Latitude, Longitude, Location
crime = crime_df.drop(columns=['ID', 'Case Number', 'Block', 'IUCR', 'Domestic', 'Ward', 'X Coordinate',
                         'Y Coordinate', 'Year', 'Updated On', 'Latitude', 'Longitude', 'Location',
                         'District', 'Community Area', 'FBI Code', 'Arrest', 'Beat'], axis=1)
crime.head()

Unnamed: 0,Date,Primary Type,Description,Location Description
0,12/31/2018 11:30:00 PM,ROBBERY,ARMED: OTHER DANGEROUS WEAPON,CTA PLATFORM
1,12/31/2018 11:27:00 PM,ASSAULT,AGGRAVATED: HANDGUN,SIDEWALK
2,12/31/2018 10:50:00 PM,ASSAULT,AGG PRO.EMP:KNIFE/CUTTING INST,GAS STATION
3,12/31/2018 10:33:00 PM,ASSAULT,AGGRAVATED: OTHER FIREARM,SIDEWALK
4,12/31/2018 10:07:00 PM,ROBBERY,ARMED: HANDGUN,STREET


# EXTRACT CHICAGO WEATHER DATA (JSON)

In [4]:
#Import Weather JSON file

weather_df = pd.read_json('chicago_weather.json', orient='columns')
weather_df.head()

Unnamed: 0,city_id,clouds,dt,dt_iso,main,rain,snow,weather,wind
0,4887398,{'all': 90},1483228800,2017-01-01 00:00:00 +0000 UTC,"{'temp': 274.06, 'temp_min': 273.15, 'temp_max...",,,"[{'id': 804, 'main': 'Clouds', 'description': ...","{'speed': 4, 'deg': 300}"
1,4887398,{'all': 40},1483232400,2017-01-01 01:00:00 +0000 UTC,"{'temp': 272.94, 'temp_min': 271.15, 'temp_max...",,,"[{'id': 802, 'main': 'Clouds', 'description': ...","{'speed': 4, 'deg': 290}"
2,4887398,{'all': 1},1483236000,2017-01-01 02:00:00 +0000 UTC,"{'temp': 272.14, 'temp_min': 271.15, 'temp_max...",,,"[{'id': 800, 'main': 'Clear', 'description': '...","{'speed': 3, 'deg': 270}"
3,4887398,{'all': 1},1483239600,2017-01-01 03:00:00 +0000 UTC,"{'temp': 270.97, 'temp_min': 270.15, 'temp_max...",,,"[{'id': 800, 'main': 'Clear', 'description': '...","{'speed': 3, 'deg': 250}"
4,4887398,{'all': 1},1483243200,2017-01-01 04:00:00 +0000 UTC,"{'temp': 270.14, 'temp_min': 269.15, 'temp_max...",,,"[{'id': 800, 'main': 'Clear', 'description': '...","{'speed': 1, 'deg': 220}"


# TRANSFORM CHICAGO WEATHER DATA

In [5]:
weather = weather_df.drop(columns=['city_id', 'weather', 'clouds', 'dt'], axis=1)
weather.rename(columns={'dt_iso': 'date', 'main': 'temp'}, inplace=True)

In [6]:
# Extract date, Max Temp, Min Temp, Windspeed

#Strip date
dates=[]
for row in weather['date']:
    dates.append(row[:10])

#Strip max and min temperatures, convert from Kelvin to Fahrenheit
max_temp =[]
min_temp =[]
for row in weather['temp']:
    max_temp.append(9/5 * (row['temp_max']-273)+32)
    min_temp.append(9/5 * (row['temp_min']-273)+32)

#Strip wind category
winds = []
for row in weather['wind']:
    winds.append(row['speed'])

#Add new lists to weather dataframe. Convert Kelvin temperature to Fahrenheit
weather['dates'] = dates
weather['max_temp'] = max_temp
weather['min_temp'] = min_temp
weather['winds'] = winds

#Drop old columns from dataframe
weather_clean = weather.drop(columns=['date', 'temp', 'rain', 'snow', 'wind'], axis=1)
weather_clean.head()

Unnamed: 0,dates,max_temp,min_temp,winds
0,2017-01-01,35.87,32.27,4
1,2017-01-01,34.07,28.67,4
2,2017-01-01,32.27,28.67,3
3,2017-01-01,30.47,26.87,3
4,2017-01-01,28.67,25.07,1
