In [1]:
import numpy as np
import pandas as pd
from datetime import datetime

In [2]:
crimes = pd.read_csv('./Crimes_-_2001_to_Present.csv')

In [3]:
crimes.columns

Index(['ID', 'Case Number', 'Date', 'Block', 'IUCR', 'Primary Type',
       'Description', 'Location Description', 'Arrest', 'Domestic', 'Beat',
       'District', 'Ward', 'Community Area', 'FBI Code', 'X Coordinate',
       'Y Coordinate', 'Year', 'Updated On', 'Latitude', 'Longitude',
       'Location'],
      dtype='object')

In [4]:
crimes['Arrest'].describe()

count     7501012
unique          2
top         False
freq      5500488
Name: Arrest, dtype: object

In [5]:
crimes.head()

Unnamed: 0,ID,Case Number,Date,Block,IUCR,Primary Type,Description,Location Description,Arrest,Domestic,...,Ward,Community Area,FBI Code,X Coordinate,Y Coordinate,Year,Updated On,Latitude,Longitude,Location
0,10224738,HY411648,09/05/2015 01:30:00 PM,043XX S WOOD ST,486,BATTERY,DOMESTIC BATTERY SIMPLE,RESIDENCE,False,True,...,12.0,61.0,08B,1165074.0,1875917.0,2015,02/10/2018 03:50:01 PM,41.815117,-87.67,"(41.815117282, -87.669999562)"
1,10224739,HY411615,09/04/2015 11:30:00 AM,008XX N CENTRAL AVE,870,THEFT,POCKET-PICKING,CTA BUS,False,False,...,29.0,25.0,06,1138875.0,1904869.0,2015,02/10/2018 03:50:01 PM,41.89508,-87.7654,"(41.895080471, -87.765400451)"
2,11646166,JC213529,09/01/2018 12:01:00 AM,082XX S INGLESIDE AVE,810,THEFT,OVER $500,RESIDENCE,False,True,...,8.0,44.0,06,,,2018,04/06/2019 04:04:43 PM,,,
3,10224740,HY411595,09/05/2015 12:45:00 PM,035XX W BARRY AVE,2023,NARCOTICS,POSS: HEROIN(BRN/TAN),SIDEWALK,True,False,...,35.0,21.0,18,1152037.0,1920384.0,2015,02/10/2018 03:50:01 PM,41.937406,-87.71665,"(41.937405765, -87.716649687)"
4,10224741,HY411610,09/05/2015 01:00:00 PM,0000X N LARAMIE AVE,560,ASSAULT,SIMPLE,APARTMENT,False,True,...,28.0,25.0,08A,1141706.0,1900086.0,2015,02/10/2018 03:50:01 PM,41.881903,-87.755121,"(41.881903443, -87.755121152)"


In [7]:
def get_weekday(date):
    datetime_date = datetime.strptime(date[:-3], '%m/%d/%Y %H:%M:%S')
    weekday = datetime_date.weekday()
    return weekday

In [8]:
crimes['Weekday'] = crimes.apply(lambda x: get_weekday(x['Date']), axis=1)

In [9]:
def get_date_only(date):
    datetime_date = datetime.strptime(date[:-3], '%m/%d/%Y %H:%M:%S')
    year = datetime_date.year
    month = datetime_date.month
    day = datetime_date.day
    date = datetime_date.strftime('%m/%d/%Y')
    return year, month, day, date

In [10]:
crimes['Year'], crimes['Month'], crimes['Day'], crimes['Date_only'] = zip(*crimes['Date'].map(get_date_only))

In [11]:
recent_crimes = crimes[crimes['Year']>2019]

In [12]:
crime_type = crimes['Primary Type'].unique()

In [13]:
print('total num of crimes: %d'%len(crime_type))
print(crime_type)

total num of crimes: 36
['BATTERY' 'THEFT' 'NARCOTICS' 'ASSAULT' 'BURGLARY' 'ROBBERY'
 'DECEPTIVE PRACTICE' 'OTHER OFFENSE' 'CRIMINAL DAMAGE'
 'WEAPONS VIOLATION' 'CRIMINAL TRESPASS' 'MOTOR VEHICLE THEFT'
 'SEX OFFENSE' 'INTERFERENCE WITH PUBLIC OFFICER'
 'OFFENSE INVOLVING CHILDREN' 'PUBLIC PEACE VIOLATION' 'PROSTITUTION'
 'GAMBLING' 'CRIM SEXUAL ASSAULT' 'LIQUOR LAW VIOLATION'
 'CRIMINAL SEXUAL ASSAULT' 'ARSON' 'STALKING' 'KIDNAPPING' 'INTIMIDATION'
 'HOMICIDE' 'CONCEALED CARRY LICENSE VIOLATION' 'NON - CRIMINAL'
 'HUMAN TRAFFICKING' 'OBSCENITY' 'PUBLIC INDECENCY'
 'OTHER NARCOTIC VIOLATION' 'NON-CRIMINAL'
 'NON-CRIMINAL (SUBJECT SPECIFIED)' 'RITUALISM' 'DOMESTIC VIOLENCE']


In [14]:
crime_type_data = dict()
for crime in crime_type:
    crime_type_data[crime] = recent_crimes[recent_crimes['Primary Type']==crime]

In [15]:
for crime in crime_type:
    print(crime, ':', len(crime_type_data[crime]))

BATTERY : 87846
THEFT : 88823
NARCOTICS : 12564
ASSAULT : 41439
BURGLARY : 16556
ROBBERY : 17160
DECEPTIVE PRACTICE : 36362
OTHER OFFENSE : 28572
CRIMINAL DAMAGE : 53390
WEAPONS VIOLATION : 18587
CRIMINAL TRESPASS : 8171
MOTOR VEHICLE THEFT : 22711
SEX OFFENSE : 2152
INTERFERENCE WITH PUBLIC OFFICER : 1033
OFFENSE INVOLVING CHILDREN : 4100
PUBLIC PEACE VIOLATION : 1948
PROSTITUTION : 434
GAMBLING : 38
CRIM SEXUAL ASSAULT : 75
LIQUOR LAW VIOLATION : 339
CRIMINAL SEXUAL ASSAULT : 2840
ARSON : 1162
STALKING : 610
KIDNAPPING : 215
INTIMIDATION : 305
HOMICIDE : 1697
CONCEALED CARRY LICENSE VIOLATION : 349
NON - CRIMINAL : 0
HUMAN TRAFFICKING : 24
OBSCENITY : 107
PUBLIC INDECENCY : 13
OTHER NARCOTIC VIOLATION : 9
NON-CRIMINAL : 5
NON-CRIMINAL (SUBJECT SPECIFIED) : 0
RITUALISM : 1
DOMESTIC VIOLENCE : 0


In [16]:
crime_type_data['BATTERY'][crime_type_data['BATTERY']['Weekday']==0]

Unnamed: 0,ID,Case Number,Date,Block,IUCR,Primary Type,Description,Location Description,Arrest,Domestic,...,Y Coordinate,Year,Updated On,Latitude,Longitude,Location,Weekday,Month,Day,Date_only
64813,12239084,JD451963,12/07/2020 11:03:00 AM,006XX S SACRAMENTO BLVD,041A,BATTERY,AGGRAVATED - HANDGUN,STREET,False,False,...,1897131.0,2020,12/18/2020 03:48:35 PM,41.873509,-87.701006,"(41.873508972, -87.701005933)",0,12,7,12/07/2020
77190,12010535,JD187404,03/16/2020 12:05:00 PM,089XX S WALLACE ST,0486,BATTERY,DOMESTIC BATTERY SIMPLE,RESIDENCE,True,True,...,1845572.0,2020,03/25/2020 03:45:43 PM,41.731656,-87.638630,"(41.731656247, -87.638630068)",0,3,16,03/16/2020
141410,12014052,JD187128,03/16/2020 05:15:00 AM,047XX N HERMITAGE AVE,0486,BATTERY,DOMESTIC BATTERY SIMPLE,APARTMENT,False,True,...,1931552.0,2020,03/25/2020 03:45:43 PM,41.967808,-87.672573,"(41.96780792, -87.672573016)",0,3,16,03/16/2020
145069,12014891,JD187819,03/16/2020 05:20:00 PM,093XX S EAST END AVE,0486,BATTERY,DOMESTIC BATTERY SIMPLE,STREET,False,True,...,1843394.0,2020,03/25/2020 03:45:43 PM,41.725327,-87.582617,"(41.725326836, -87.582617441)",0,3,16,03/16/2020
156554,12013863,JD188458,03/16/2020 11:30:00 PM,048XX W ADAMS ST,0486,BATTERY,DOMESTIC BATTERY SIMPLE,APARTMENT,False,True,...,1898896.0,2020,03/25/2020 03:45:43 PM,41.878590,-87.745762,"(41.87859028, -87.745761689)",0,3,16,03/16/2020
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7499059,12617479,JF143703,02/14/2022 04:45:00 AM,048XX W ADAMS ST,0497,BATTERY,AGGRAVATED DOMESTIC BATTERY - OTHER DANGEROUS ...,APARTMENT,True,True,...,1898894.0,2022,03/09/2022 03:46:12 PM,41.878586,-87.746055,"(41.878586294, -87.746055487)",0,2,14,02/14/2022
7499062,12617982,JF144372,02/14/2022 09:00:00 AM,056XX N LINCOLN AVE,0486,BATTERY,DOMESTIC BATTERY SIMPLE,APARTMENT,False,True,...,1937177.0,2022,03/10/2022 03:47:48 PM,41.983374,-87.695525,"(41.983373861, -87.695524775)",0,2,14,02/14/2022
7500319,12600616,JF122712,01/24/2022 01:37:00 PM,043XX N CLARENDON AVE,0495,BATTERY,AGGRAVATED OF A SENIOR CITIZEN,APARTMENT,False,False,...,1929242.0,2022,03/11/2022 03:46:45 PM,41.961336,-87.649899,"(41.961336068, -87.64989918)",0,1,24,01/24/2022
7500670,12637246,JF159756,02/28/2022 10:30:00 AM,010XX N NOBLE ST,0486,BATTERY,DOMESTIC BATTERY SIMPLE,SCHOOL - PUBLIC BUILDING,False,True,...,1906900.0,2022,03/11/2022 03:46:45 PM,41.900099,-87.662516,"(41.900099097, -87.662516286)",0,2,28,02/28/2022


In [17]:
crimes_per_day = dict()

for crime in crime_type:
    temp = dict()
    for day in range(7):
        temp[day] = crime_type_data[crime][crime_type_data[crime]['Weekday']==day]
    crimes_per_day[crime] = temp

In [18]:
crimes_per_day

{'BATTERY': {0:                ID Case Number                    Date  \
  64813    12239084    JD451963  12/07/2020 11:03:00 AM   
  77190    12010535    JD187404  03/16/2020 12:05:00 PM   
  141410   12014052    JD187128  03/16/2020 05:15:00 AM   
  145069   12014891    JD187819  03/16/2020 05:20:00 PM   
  156554   12013863    JD188458  03/16/2020 11:30:00 PM   
  ...           ...         ...                     ...   
  7499059  12617479    JF143703  02/14/2022 04:45:00 AM   
  7499062  12617982    JF144372  02/14/2022 09:00:00 AM   
  7500319  12600616    JF122712  01/24/2022 01:37:00 PM   
  7500670  12637246    JF159756  02/28/2022 10:30:00 AM   
  7500735  12639196    JF170199  02/14/2022 04:30:00 PM   
  
                             Block  IUCR Primary Type  \
  64813    006XX S SACRAMENTO BLVD  041A      BATTERY   
  77190         089XX S WALLACE ST  0486      BATTERY   
  141410     047XX N HERMITAGE AVE  0486      BATTERY   
  145069      093XX S EAST END AVE  0486      B

In [29]:
n_days = 7
n_dates = len(recent_crimes['Date_only'].unique())
crime_type = recent_crimes['Primary Type'].unique()
daily_crimes_per_weekday = dict()

for crime in crime_type:
    temp = dict()
    for day in range(n_days):
        day_cnt = max(len(set(crimes_per_day[crime][day]['Date_only'])), 0.0000001)
        cnt = crimes_per_day[crime][day]['Date_only'].value_counts()
        avg = sum(cnt)/day_cnt
        var = sum(pow(cnt-avg, 2))/day_cnt
        temp[day] = [avg, var]
    daily_crimes_per_weekday[crime] = temp

In [30]:
daily_crimes_per_weekday

{'THEFT': {0: [111.12389380530973, 560.9934998825281],
  1: [113.45132743362832, 560.0175424857074],
  2: [113.97368421052632, 544.5343951985225],
  3: [113.08771929824562, 575.0624807633119],
  4: [120.54385964912281, 745.5112342259158],
  5: [111.89380530973452, 706.7320855196178],
  6: [98.89380530973452, 510.6789881744853]},
 'MOTOR VEHICLE THEFT': {0: [28.486725663716815, 57.382567154828045],
  1: [27.911504424778762, 54.77092959511319],
  2: [27.37719298245614, 52.32263773468763],
  3: [28.17543859649123, 54.03939673745767],
  4: [29.763157894736842, 53.49653739612188],
  5: [29.654867256637168, 74.89858250450314],
  6: [28.858407079646017, 97.67906648915344]},
 'ASSAULT': {0: [51.690265486725664, 151.2580468321718],
  1: [52.469026548672566, 136.78001409664026],
  2: [53.96491228070175, 130.94613727300703],
  3: [52.10526315789474, 131.14681440443212],
  4: [51.771929824561404, 123.87780855647894],
  5: [51.823008849557525, 105.01292192027563],
  6: [51.49557522123894, 140.78095