# Crime Sampler Using Dataset From Chicago Police

# IMPORT LIBRARY

Import necessary library

In [1]:
# Import the csv module
import csv

# Import the pendulum module for datetime manipulation
import pendulum

# Import Counter, defaultdict from collections module
from collections import Counter
from collections import defaultdict

# Import the datetime module
from datetime import datetime

Open Crime Sampler dataset using csv import

In [21]:
# Create the file object: csvfile
csv_file = open('Crimes2018.csv','r')

# Create an empty list: crime_data
crime_data = []

Read crime_sampler.csv line by line, then convert it to crime_data list

In [3]:
# Loop over a csv reader on the file object
for row in csv.reader(csv_file):

    # Append the date, type of crime, location description, and arrest
    crime_data.append((row[2], row[5], row[7], row[8]))

In [4]:
crime_data[0:3]

[('Date', 'Primary Type', 'Location Description', 'Arrest'),
 ('01/10/2018 03:00:00 PM', 'THEFT', 'RESIDENCE', 'false'),
 ('04/01/2018 08:00:00 AM', 'THEFT', 'APARTMENT', 'false')]

In [5]:
# Remove the first element from crime_data
crime_data.pop(0)

('Date', 'Primary Type', 'Location Description', 'Arrest')

Print crime_data for 10 first list

In [6]:
# Print the first 10 records
print(crime_data[:10])

[('01/10/2018 03:00:00 PM', 'THEFT', 'RESIDENCE', 'false'), ('04/01/2018 08:00:00 AM', 'THEFT', 'APARTMENT', 'false'), ('07/18/2018 08:00:00 AM', 'DECEPTIVE PRACTICE', 'RESIDENCE', 'false'), ('12/01/2018 07:00:00 AM', 'DECEPTIVE PRACTICE', 'RESIDENCE', 'false'), ('03/20/2018 12:01:00 AM', 'SEX OFFENSE', 'RESIDENCE', 'false'), ('12/09/2018 05:36:00 PM', 'CRIMINAL SEXUAL ASSAULT', 'APARTMENT', 'false'), ('09/18/2018 12:10:00 PM', 'BATTERY', 'SIDEWALK', 'false'), ('06/01/2018 12:01:00 AM', 'OFFENSE INVOLVING CHILDREN', 'APARTMENT', 'false'), ('04/29/2018 11:45:00 PM', 'OFFENSE INVOLVING CHILDREN', 'RESIDENCE', 'true'), ('07/01/2018 12:00:00 AM', 'OFFENSE INVOLVING CHILDREN', 'APARTMENT', 'false')]


# CRIME BY MONTH

Find the Months with the Highest Number of Crimes

In [7]:
# Create a Counter Object: crimes_by_month
crimes_by_month = Counter()

In [8]:
# Loop over the crime_data list
for date in crime_data:

    # Convert the first element of each item into a Python Datetime Object: date
    date = datetime.strptime(date[0], '%m/%d/%Y %I:%M:%S %p')

    # Increment the counter for the month of the row by one
    crimes_by_month[date.month] += 1

Print top 3 month for crime

In [9]:
# Print the 3 most common months for crime
print(crimes_by_month.most_common(3))

[(8, 25377), (7, 25232), (5, 24695)]


We can see that Month of August was the highest crime month followed by July and May.

Now time to flip our crime_data list into a dictionary
keyed by month with a list of location values for each
month, and filter down to the records for the year 2018

In [10]:
# Create a dictionary that defaults to a list: locations_by_month
locations_by_month = defaultdict(list)

In [11]:
# Loop over the crime_data list
for row in crime_data:
    # Convert the first element to a date object
    date = datetime.strptime(row[0], '%m/%d/%Y %I:%M:%S %p')

    # If the year is 2018
    if date.year == 2018:
        # Set the dictionary key to the month and append the location (2nd element) to the values list
        locations_by_month[date.month].append(row[2])

In [12]:
# Print the dictionary for Month January
print(locations_by_month[1])

['RESIDENCE', 'BARBERSHOP', '', 'APARTMENT', 'RESIDENCE', 'APARTMENT', 'APARTMENT', 'APARTMENT', 'RESIDENCE', 'STREET', 'STREET', 'APARTMENT', 'APARTMENT', 'APARTMENT', 'STREET', 'OTHER (SPECIFY)', 'APARTMENT', 'RESIDENCE - GARAGE', 'OTHER (SPECIFY)', 'OTHER (SPECIFY)', 'RESIDENCE', 'RESIDENCE', 'PARK PROPERTY', 'SCHOOL - PUBLIC BUILDING', 'RESIDENCE', 'STREET', 'ALLEY', '', 'SIDEWALK', 'APARTMENT', 'VACANT LOT / LAND', 'RESIDENCE', 'RESIDENCE', 'APARTMENT', 'RESIDENCE', 'RESIDENCE', 'RESIDENCE', 'SIDEWALK', 'SIDEWALK', 'STREET', 'CURRENCY EXCHANGE', 'CURRENCY EXCHANGE', 'RESIDENCE', 'OTHER', 'RESIDENCE', 'OTHER', 'RESIDENCE', 'CONVENIENCE STORE', 'APARTMENT', 'RESIDENCE', 'WAREHOUSE', 'RESIDENCE', 'RESIDENCE', 'APARTMENT', 'APARTMENT', 'SMALL RETAIL STORE', 'POLICE FACILITY/VEH PARKING LOT', 'OTHER', 'VEHICLE NON-COMMERCIAL', 'RESIDENCE', 'SMALL RETAIL STORE', 'RESIDENCE', 'RESIDENCE', 'RESIDENCE', 'RESIDENCE', 'RESIDENCE - GARAGE', 'RESIDENCE', 'RESIDENCE', 'RESIDENCE', 'FEDERAL BUIL

Find the Most Common Crimes by Location Type by Month in 2018

In [13]:
# Loop over the items from locations_by_month using tuple expansion of the month and locations
for month, locations in locations_by_month.items():
    # Make a Counter of the locations
    location_count = Counter(locations)
    # Print the month
    print(month)
    # Print the most common location
    print(location_count.most_common(5))

1
[('STREET', 4442), ('RESIDENCE', 3668), ('APARTMENT', 2831), ('SIDEWALK', 1281), ('OTHER', 835)]
4
[('STREET', 4482), ('RESIDENCE', 3549), ('APARTMENT', 2805), ('SIDEWALK', 1542), ('OTHER', 867)]
7
[('STREET', 5701), ('RESIDENCE', 4121), ('APARTMENT', 3007), ('SIDEWALK', 2354), ('OTHER', 1019)]
12
[('STREET', 4904), ('RESIDENCE', 3705), ('APARTMENT', 3056), ('SIDEWALK', 1445), ('OTHER', 912)]
3
[('STREET', 4476), ('RESIDENCE', 3814), ('APARTMENT', 2811), ('SIDEWALK', 1461), ('OTHER', 868)]
9
[('STREET', 5307), ('RESIDENCE', 3543), ('APARTMENT', 2840), ('SIDEWALK', 2112), ('OTHER', 914)]
6
[('STREET', 5262), ('RESIDENCE', 4003), ('APARTMENT', 2982), ('SIDEWALK', 2134), ('OTHER', 1010)]
5
[('STREET', 5444), ('RESIDENCE', 4012), ('APARTMENT', 3161), ('SIDEWALK', 2045), ('OTHER', 977)]
10
[('STREET', 5107), ('RESIDENCE', 3697), ('APARTMENT', 2904), ('SIDEWALK', 1867), ('OTHER', 907)]
11
[('STREET', 4562), ('RESIDENCE', 3607), ('APARTMENT', 2889), ('SIDEWALK', 1458), ('OTHER', 820)]
8
[('

We can see that most common of crime occured was on the Street

# CRIME BY DISTRIC

Now we want to know crime by district

In [14]:
# Create the file object: csvfile
csv_file = open('Crimes2018.csv','r')

# Create a dictionary that defaults to a list: crimes_by_district
crimes_by_district = defaultdict(list)

Read crime_sampler.csv line by line, then convert it to crime_by_district list

In [23]:
# Loop over a DictReader of the CSV file
for row in csv.DictReader(csv_file):
    # Pop the district from each row: district
    district = row.pop('District')
    # Append the rest of the data to the list for proper district in crimes_by_district
    crimes_by_district[district].append(row)

In [25]:
for district, crimes in crimes_by_district.items():
    # Print the district
    print(district)

    # Create an empty Counter object: year_count
    year_count = Counter()

    # Loop over the crimes:
    for crime in crimes:
        # If there was an arrest
        if crime['Arrest'] == 'true':
            # Convert the Date to a datetime and get the year
            year = datetime.strptime(crime['Date'], '%m/%d/%Y %I:%M:%S %p').year
            # Increment the Counter for the year
            year_count[year] += 1

    # Print the counter
    print(year_count)

010
Counter({2018: 4066})
019
Counter({2018: 1617})
009
Counter({2018: 2146})
006
Counter({2018: 4113})
008
Counter({2018: 2579})
025
Counter({2018: 2515})
011
Counter({2018: 7234})
004
Counter({2018: 3127})
002
Counter({2018: 1742})
003
Counter({2018: 2341})
012
Counter({2018: 1820})
018
Counter({2018: 2349})
017
Counter({2018: 829})
007
Counter({2018: 3612})
001
Counter({2018: 2363})
016
Counter({2018: 1189})
022
Counter({2018: 1487})
005
Counter({2018: 3052})
015
Counter({2018: 2494})
024
Counter({2018: 1092})
031
Counter({2018: 1})
014
Counter({2018: 1129})
020
Counter({2018: 736})


Can be seen that district '011' most crime this 2018 year
