# Analysizing Crime Data

In [1]:
# Dependencies and Setup
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime
import os

In [2]:
file = "crimemap.csv"
crime = pd.read_csv(file)
crime.head()

Unnamed: 0,Mapping,MapCrime
0,Burglary,Theft
1,Violence,Violence
2,Other,Other
3,Auto-Theft,Auto-Theft
4,Verbal,Other


In [3]:
file = "weathermap.csv"
weather = pd.read_csv(file)
weather.head()

Unnamed: 0,Weather,MapWeather
0,Clear,Clear
1,Drizzle,Rain
2,Drizzle and Humid,Rain
3,Flurries,Snow
4,Foggy,Rain


In [4]:
file = "combined_crime_withKey_withweather.csv"
df = pd.read_csv(file)
df.head()

Unnamed: 0.1,Unnamed: 0,City,Code,StartDate,StartTime,Latitude,Longitude,Mapping,Weather
0,0,atlanta,A720,2014-01-01,1,33.69915,-84.41819,Auto-Theft,Partly Cloudy
1,1,atlanta,A720,2014-01-02,21,33.83697,-84.37948,Auto-Theft,Possible Drizzle
2,2,atlanta,A720,2014-01-04,14,33.80726,-84.47158,Auto-Theft,Mostly Cloudy
3,3,atlanta,A720,2014-01-04,12,33.76117,-84.39066,Auto-Theft,Partly Cloudy
4,4,atlanta,A720,2014-01-05,22,33.75294,-84.48775,Auto-Theft,Overcast


In [5]:
df["sDate"] = pd.to_datetime(df['StartDate'])
df.head()

Unnamed: 0.1,Unnamed: 0,City,Code,StartDate,StartTime,Latitude,Longitude,Mapping,Weather,sDate
0,0,atlanta,A720,2014-01-01,1,33.69915,-84.41819,Auto-Theft,Partly Cloudy,2014-01-01
1,1,atlanta,A720,2014-01-02,21,33.83697,-84.37948,Auto-Theft,Possible Drizzle,2014-01-02
2,2,atlanta,A720,2014-01-04,14,33.80726,-84.47158,Auto-Theft,Mostly Cloudy,2014-01-04
3,3,atlanta,A720,2014-01-04,12,33.76117,-84.39066,Auto-Theft,Partly Cloudy,2014-01-04
4,4,atlanta,A720,2014-01-05,22,33.75294,-84.48775,Auto-Theft,Overcast,2014-01-05


In [6]:
df["Year"] = pd.DatetimeIndex(df['sDate']).year
df["Month"] = pd.DatetimeIndex(df['sDate']).month
df['DayofWeek'] = df['sDate'].dt.weekday_name
df.head()

Unnamed: 0.1,Unnamed: 0,City,Code,StartDate,StartTime,Latitude,Longitude,Mapping,Weather,sDate,Year,Month,DayofWeek
0,0,atlanta,A720,2014-01-01,1,33.69915,-84.41819,Auto-Theft,Partly Cloudy,2014-01-01,2014,1,Wednesday
1,1,atlanta,A720,2014-01-02,21,33.83697,-84.37948,Auto-Theft,Possible Drizzle,2014-01-02,2014,1,Thursday
2,2,atlanta,A720,2014-01-04,14,33.80726,-84.47158,Auto-Theft,Mostly Cloudy,2014-01-04,2014,1,Saturday
3,3,atlanta,A720,2014-01-04,12,33.76117,-84.39066,Auto-Theft,Partly Cloudy,2014-01-04,2014,1,Saturday
4,4,atlanta,A720,2014-01-05,22,33.75294,-84.48775,Auto-Theft,Overcast,2014-01-05,2014,1,Sunday


In [7]:
df_crime = pd.merge(df, crime, how='inner', left_on = "Mapping", right_on = "Mapping")
df_crime.head()

Unnamed: 0.1,Unnamed: 0,City,Code,StartDate,StartTime,Latitude,Longitude,Mapping,Weather,sDate,Year,Month,DayofWeek,MapCrime
0,0,atlanta,A720,2014-01-01,1,33.69915,-84.41819,Auto-Theft,Partly Cloudy,2014-01-01,2014,1,Wednesday,Auto-Theft
1,1,atlanta,A720,2014-01-02,21,33.83697,-84.37948,Auto-Theft,Possible Drizzle,2014-01-02,2014,1,Thursday,Auto-Theft
2,2,atlanta,A720,2014-01-04,14,33.80726,-84.47158,Auto-Theft,Mostly Cloudy,2014-01-04,2014,1,Saturday,Auto-Theft
3,3,atlanta,A720,2014-01-04,12,33.76117,-84.39066,Auto-Theft,Partly Cloudy,2014-01-04,2014,1,Saturday,Auto-Theft
4,4,atlanta,A720,2014-01-05,22,33.75294,-84.48775,Auto-Theft,Overcast,2014-01-05,2014,1,Sunday,Auto-Theft


In [8]:
df_crime_weather = pd.merge(df_crime, weather, how='inner', left_on = "Weather", right_on = "Weather")
df_crime_weather.head()

Unnamed: 0.1,Unnamed: 0,City,Code,StartDate,StartTime,Latitude,Longitude,Mapping,Weather,sDate,Year,Month,DayofWeek,MapCrime,MapWeather
0,0,atlanta,A720,2014-01-01,1,33.69915,-84.41819,Auto-Theft,Partly Cloudy,2014-01-01,2014,1,Wednesday,Auto-Theft,Partly Cloudy
1,3,atlanta,A720,2014-01-04,12,33.76117,-84.39066,Auto-Theft,Partly Cloudy,2014-01-04,2014,1,Saturday,Auto-Theft,Partly Cloudy
2,5,atlanta,A720,2014-01-07,16,33.80294,-84.33821,Auto-Theft,Partly Cloudy,2014-01-07,2014,1,Tuesday,Auto-Theft,Partly Cloudy
3,8,atlanta,A720,2014-01-07,22,33.74516,-84.42502,Auto-Theft,Partly Cloudy,2014-01-07,2014,1,Tuesday,Auto-Theft,Partly Cloudy
4,13,atlanta,A720,2014-01-09,11,33.79973,-84.36868,Auto-Theft,Partly Cloudy,2014-01-09,2014,1,Thursday,Auto-Theft,Partly Cloudy


In [9]:
df_crime_weather.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2872092 entries, 0 to 2872091
Data columns (total 15 columns):
Unnamed: 0    int64
City          object
Code          object
StartDate     object
StartTime     int64
Latitude      float64
Longitude     float64
Mapping       object
Weather       object
sDate         datetime64[ns]
Year          int64
Month         int64
DayofWeek     object
MapCrime      object
MapWeather    object
dtypes: datetime64[ns](1), float64(2), int64(4), object(8)
memory usage: 350.6+ MB


In [11]:
fields1 = ["City", "Year", "Month", "DayofWeek", "StartTime", "MapCrime", "MapWeather", "Latitude"]
df1 = df_crime_weather[fields1].copy()
df1.head()

Unnamed: 0,City,Year,Month,DayofWeek,StartTime,MapCrime,MapWeather,Latitude
0,atlanta,2014,1,Wednesday,1,Auto-Theft,Partly Cloudy,33.69915
1,atlanta,2014,1,Saturday,12,Auto-Theft,Partly Cloudy,33.76117
2,atlanta,2014,1,Tuesday,16,Auto-Theft,Partly Cloudy,33.80294
3,atlanta,2014,1,Tuesday,22,Auto-Theft,Partly Cloudy,33.74516
4,atlanta,2014,1,Thursday,11,Auto-Theft,Partly Cloudy,33.79973


In [13]:
fields2 = ["City", "Year", "Month", "DayofWeek", "StartTime", "MapCrime", "MapWeather"]
df2 = df1.groupby(fields2).agg("count")
df2 = df2.rename(columns = {"Latitude" : "Count"})
df2.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Count
City,Year,Month,DayofWeek,StartTime,MapCrime,MapWeather,Unnamed: 7_level_1
atlanta,2014,1,Friday,0,Auto-Theft,Clear,1
atlanta,2014,1,Friday,0,Theft,Clear,9
atlanta,2014,1,Friday,0,Theft,Overcast,1
atlanta,2014,1,Friday,0,Theft,Partly Cloudy,2
atlanta,2014,1,Friday,0,Violence,Overcast,1


In [14]:
crime = df2.copy()
df2.to_csv("SummaryData.csv", sep=',', encoding='utf-8')

In [15]:
from sqlalchemy import create_engine

In [16]:
engine = create_engine('sqlite:///db/crime.sqlite')

In [20]:
crime.to_sql('crime', engine)

In [21]:
city_label = "atlanta"
sql_string = 'SELECT StartTime, sum(Count) FROM crime WHERE City = "' + city_label + '" GROUP By StartTime'
test = pd.read_sql_query(sql_string,engine)
test.head()

Unnamed: 0,StartTime,sum(Count)
0,0,6986
1,1,4395
2,2,3356
3,3,2875
4,4,2133


In [22]:
sample = "StartTime"
city = '"atlanta"'
stmt1 = "Select " + sample +", SUM(Count) FROM crime"
stmt2 = " WHERE City = " + city
stmt3 = " GROUP BY " + sample
stmt = stmt1 + stmt2 + stmt3
print(stmt)

Select StartTime, SUM(Count) FROM crime WHERE City = "atlanta" GROUP BY StartTime


In [23]:
test = pd.read_sql_query(stmt,engine)
test.head()

Unnamed: 0,StartTime,SUM(Count)
0,0,6986
1,1,4395
2,2,3356
3,3,2875
4,4,2133


In [24]:
sample = {"xAxis": test.iloc[:,0].tolist(), "yAxis": test.iloc[:,1].tolist()}
print(sample)

{'xAxis': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23], 'yAxis': [6986, 4395, 3356, 2875, 2133, 2024, 2414, 3295, 5549, 4683, 5320, 5678, 8291, 6500, 6709, 7450, 7362, 8275, 8945, 8883, 8451, 7615, 7557, 6879]}


In [25]:
test.iloc[:,0].tolist()

[0,
 1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23]