### Import libraries

In [1]:
import os
import pandas as pd
import copy

### List of directories with corresponding paths:

In [2]:
main_dir = os.path.abspath('..') # os.path.join(os.getcwd())
data_dir = os.path.join(main_dir, "Data")
shapefiles_dir = os.path.join(main_dir, data_dir, "shapefiles")

# Read in merged dataset that includes Crime and COVID case data

In [3]:
df_pre_covid_crime_and_covid_cases = pd.read_csv(os.path.join(data_dir, "Merged", "pre_covid_crime_and_covid_cases.csv"))
df_covid_crime_and_cases = pd.read_csv(os.path.join(data_dir, "Merged", "covid_crime_and_covid_cases.csv"))

df_pre_covid_crime_and_covid_cases['REPORT_DAT'] = pd.to_datetime(df_pre_covid_crime_and_covid_cases['REPORT_DAT'])
df_pre_covid_crime_and_covid_cases['START_DATE'] = pd.to_datetime(df_pre_covid_crime_and_covid_cases['START_DATE'])
df_pre_covid_crime_and_covid_cases['END_DATE'] = pd.to_datetime(df_pre_covid_crime_and_covid_cases['END_DATE'])
del df_pre_covid_crime_and_covid_cases["START_DATE.1"]

df_covid_crime_and_cases['REPORT_DAT'] = pd.to_datetime(df_covid_crime_and_cases['REPORT_DAT'])
df_covid_crime_and_cases['START_DATE'] = pd.to_datetime(df_covid_crime_and_cases['START_DATE'])
df_covid_crime_and_cases['END_DATE'] = pd.to_datetime(df_covid_crime_and_cases['END_DATE'])
df_covid_crime_and_cases['date'] = pd.to_datetime(df_covid_crime_and_cases['date'])
del df_covid_crime_and_cases[df_covid_crime_and_cases.columns[0]]

# Remove NaN values:
df_pre_covid_crime_and_covid_cases = df_pre_covid_crime_and_covid_cases.dropna()
df_covid_crime_and_cases = df_covid_crime_and_cases.dropna()

In [4]:
print("Pre-COVID Crime with COVID Case data:", df_pre_covid_crime_and_covid_cases.shape)

print("COVID Crime with COVID Case data:", df_covid_crime_and_cases.shape)

Pre-COVID Crime with COVID Case data: (18002, 17)
COVID Crime with COVID Case data: (35738, 17)


In [5]:
df_pre_covid_crime_and_covid_cases.head(3)

Unnamed: 0,START_DATE,REPORT_DAT,SHIFT,METHOD,OFFENSE,BLOCK,XBLOCK,YBLOCK,WARD,DISTRICT,LATITUDE,LONGITUDE,END_DATE,geometry,date,covid_daily_case_count,covid_deaths
0,2017-01-01 00:28:57+00:00,2017-01-02 01:16:13+00:00,EVENING,OTHERS,THEFT/OTHER,1900 - 2199 BLOCK OF RIDGECREST COURT SE,402201.041504,131112.794647,8.0,7.0,38.847816,-76.974645,2017-01-01 00:45:35+00:00,POINT (-76.97464451720001 38.8478160318),2017-01-01,0,0
1,2017-01-01 01:00:51+00:00,2017-01-02 17:20:02+00:00,DAY,OTHERS,THEFT F/AUTO,1700 - 1799 BLOCK OF P STREET NW,396523.77,137976.79,2.0,2.0,38.909645,-77.04008,2017-01-02 03:00:58+00:00,POINT (-77.0400800705 38.9096454698),2017-01-01,0,0
2,2017-01-01 01:00:56+00:00,2017-01-02 02:51:06+00:00,EVENING,OTHERS,THEFT F/AUTO,1821 - 1899 BLOCK OF 16TH STREET NW,396834.07,138593.52,2.0,3.0,38.915202,-77.036505,2017-01-01 20:30:39+00:00,POINT (-77.0365052323 38.9152023543),2017-01-01,0,0


In [6]:
df_covid_crime_and_cases.head(3)

Unnamed: 0,START_DATE,REPORT_DAT,SHIFT,METHOD,OFFENSE,BLOCK,XBLOCK,YBLOCK,WARD,DISTRICT,LATITUDE,LONGITUDE,END_DATE,geometry,date,covid_daily_case_count,covid_deaths
0,2020-03-07 00:00:18+00:00,2020-03-07 23:44:58+00:00,EVENING,OTHERS,THEFT/OTHER,1400 - 1499 BLOCK OF GIRARD STREET NW,397063.06,139725.8,1.0,3.0,38.925403,-77.03387,2020-03-07 14:00:22+00:00,POINT (-77.0338696729 38.925403095),2020-03-07,1,0
1,2020-03-07 00:07:26+00:00,2020-03-07 03:18:07+00:00,EVENING,OTHERS,THEFT F/AUTO,4300 - 4399 BLOCK OF VARNUM PLACE NE,400322.05,141492.13,5.0,4.0,38.94132,-76.996285,2020-03-07 00:47:04+00:00,POINT (-76.9962851937 38.94131958),2020-03-07,1,0
2,2020-03-07 00:17:13+00:00,2020-03-07 01:23:10+00:00,EVENING,OTHERS,THEFT/OTHER,3100 - 3299 BLOCK OF 14TH STREET NW,397162.06,140182.43,1.0,3.0,38.929517,-77.03273,2020-03-07 00:45:03+00:00,POINT (-77.03272986330001 38.9295168861),2020-03-07,1,0


In [7]:
# Now, let us sort the data appropriately:

In [8]:
df_covid_crime_and_cases.dtypes

START_DATE                datetime64[ns, UTC]
REPORT_DAT                datetime64[ns, UTC]
SHIFT                                  object
METHOD                                 object
OFFENSE                                object
BLOCK                                  object
XBLOCK                                float64
YBLOCK                                float64
WARD                                  float64
DISTRICT                              float64
LATITUDE                              float64
LONGITUDE                             float64
END_DATE                  datetime64[ns, UTC]
geometry                               object
date                           datetime64[ns]
covid_daily_case_count                  int64
covid_deaths                            int64
dtype: object

In [9]:
df_covid_crime_and_cases.columns

Index(['START_DATE', 'REPORT_DAT', 'SHIFT', 'METHOD', 'OFFENSE', 'BLOCK',
       'XBLOCK', 'YBLOCK', 'WARD', 'DISTRICT', 'LATITUDE', 'LONGITUDE',
       'END_DATE', 'geometry', 'date', 'covid_daily_case_count',
       'covid_deaths'],
      dtype='object')

In [10]:
# Overall group by logic in order to get the appropriate structure:
# group by date to get total crimes
# group by OFFENSE (you'll have 8 lines)
# group ward (you'll have another 8 lines)
# shift
# then merge all of them by date

### Get the Crime Rate Column:

In [11]:
df_crime_rate = copy.deepcopy(df_covid_crime_and_cases[["date"]])
df_crime_rate['daily_crime_count'] = 0

df_crime_rate = df_crime_rate.groupby(by=['date']).count()
df_crime_rate = df_crime_rate.reset_index()
df_crime_rate

Unnamed: 0,date,daily_crime_count
0,2020-03-07,65
1,2020-03-08,56
2,2020-03-09,82
3,2020-03-10,71
4,2020-03-11,70
...,...,...
530,2021-10-17,71
531,2021-10-18,83
532,2021-10-19,75
533,2021-10-20,48


### Get the Crime Type Column:

In [12]:
set(df_covid_crime_and_cases['OFFENSE'].values)

{'ARSON',
 'ASSAULT W/DANGEROUS WEAPON',
 'BURGLARY',
 'HOMICIDE',
 'MOTOR VEHICLE THEFT',
 'ROBBERY',
 'SEX ABUSE',
 'THEFT F/AUTO',
 'THEFT/OTHER'}

In [13]:
df_by_crime_type = copy.deepcopy(df_covid_crime_and_cases[["date", "OFFENSE"]])
df_by_crime_type['total_per_crime_type'] = 0

df_by_crime_type = df_by_crime_type.groupby(by=['date', "OFFENSE"]).count()
df_by_crime_type = df_by_crime_type.reset_index()

df_by_crime_type

Unnamed: 0,date,OFFENSE,total_per_crime_type
0,2020-03-07,ASSAULT W/DANGEROUS WEAPON,3
1,2020-03-07,BURGLARY,2
2,2020-03-07,MOTOR VEHICLE THEFT,6
3,2020-03-07,ROBBERY,3
4,2020-03-07,THEFT F/AUTO,17
...,...,...,...
3461,2021-10-20,THEFT F/AUTO,13
3462,2021-10-20,THEFT/OTHER,20
3463,2021-10-21,ASSAULT W/DANGEROUS WEAPON,1
3464,2021-10-21,MOTOR VEHICLE THEFT,1


In [14]:
df_ARSON_rate = copy.deepcopy(df_by_crime_type[df_by_crime_type["OFFENSE"]=="ARSON"][['date','total_per_crime_type']].reset_index())
df_ASSAULT_rate = copy.deepcopy(df_by_crime_type[df_by_crime_type["OFFENSE"]=="ASSAULT W/DANGEROUS WEAPON"][['date','total_per_crime_type']].reset_index())
df_BURGLARY_rate = copy.deepcopy(df_by_crime_type[df_by_crime_type["OFFENSE"]=="BURGLARY"][['date','total_per_crime_type']].reset_index())
df_HOMICIDE_rate = copy.deepcopy(df_by_crime_type[df_by_crime_type["OFFENSE"]=="HOMICIDE"][['date','total_per_crime_type']].reset_index())
df_MOTOR_THEFT_rate = copy.deepcopy(df_by_crime_type[df_by_crime_type["OFFENSE"]=="MOTOR VEHICLE THEFT"][['date','total_per_crime_type']].reset_index())
df_ROBBERY_rate = copy.deepcopy(df_by_crime_type[df_by_crime_type["OFFENSE"]=="ROBBERY"][['date','total_per_crime_type']].reset_index())
df_SEX_ABUSE_rate = copy.deepcopy(df_by_crime_type[df_by_crime_type["OFFENSE"]=="SEX ABUSE"][['date','total_per_crime_type']].reset_index())
df_THEFT_FROM_CAR_rate = copy.deepcopy(df_by_crime_type[df_by_crime_type["OFFENSE"]=="THEFT F/AUTO"][['date','total_per_crime_type']].reset_index())
df_THEFT_FROM_OTHER_rate = copy.deepcopy(df_by_crime_type[df_by_crime_type["OFFENSE"]=="THEFT/OTHER"][['date','total_per_crime_type']].reset_index())

In [15]:
df_MOTOR_THEFT_rate

Unnamed: 0,index,date,total_per_crime_type
0,2,2020-03-07,6
1,8,2020-03-08,7
2,14,2020-03-09,4
3,21,2020-03-10,6
4,26,2020-03-11,4
...,...,...,...
529,3437,2021-10-17,15
530,3444,2021-10-18,14
531,3451,2021-10-19,10
532,3458,2021-10-20,4


### Get the Ward Count Column:

In [16]:
set(df_covid_crime_and_cases['WARD'].values)

{1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}

In [17]:
df_by_ward_id = copy.deepcopy(df_covid_crime_and_cases[["date", "WARD"]])
df_by_ward_id['total_crime_count_per_ward'] = 0

df_by_ward_id = df_by_ward_id.groupby(by=['date', "WARD"]).count()
df_by_ward_id = df_by_ward_id.reset_index()
# df_by_crime_type
df_by_ward_id

Unnamed: 0,date,WARD,total_crime_count_per_ward
0,2020-03-07,1.0,12
1,2020-03-07,2.0,13
2,2020-03-07,3.0,6
3,2020-03-07,4.0,7
4,2020-03-07,5.0,10
...,...,...,...
4239,2021-10-20,7.0,8
4240,2021-10-20,8.0,5
4241,2021-10-21,1.0,1
4242,2021-10-21,2.0,1


In [18]:
df_WARD_1 = copy.deepcopy(df_by_ward_id[df_by_ward_id["WARD"]==1.0][['date','total_crime_count_per_ward']].reset_index())
df_WARD_2 = copy.deepcopy(df_by_ward_id[df_by_ward_id["WARD"]==2.0][['date','total_crime_count_per_ward']].reset_index())
df_WARD_3 = copy.deepcopy(df_by_ward_id[df_by_ward_id["WARD"]==3.0][['date','total_crime_count_per_ward']].reset_index())
df_WARD_4 = copy.deepcopy(df_by_ward_id[df_by_ward_id["WARD"]==4.0][['date','total_crime_count_per_ward']].reset_index())
df_WARD_5 = copy.deepcopy(df_by_ward_id[df_by_ward_id["WARD"]==5.0][['date','total_crime_count_per_ward']].reset_index())
df_WARD_6 = copy.deepcopy(df_by_ward_id[df_by_ward_id["WARD"]==6.0][['date','total_crime_count_per_ward']].reset_index())
df_WARD_7 = copy.deepcopy(df_by_ward_id[df_by_ward_id["WARD"]==7.0][['date','total_crime_count_per_ward']].reset_index())
df_WARD_8 = copy.deepcopy(df_by_ward_id[df_by_ward_id["WARD"]==8.0][['date','total_crime_count_per_ward']].reset_index())

In [19]:
df_WARD_5

Unnamed: 0,index,date,total_crime_count_per_ward
0,4,2020-03-07,10
1,12,2020-03-08,8
2,20,2020-03-09,12
3,28,2020-03-10,9
4,36,2020-03-11,8
...,...,...,...
529,4206,2021-10-16,14
530,4214,2021-10-17,11
531,4222,2021-10-18,7
532,4230,2021-10-19,19


### Get Shift Count Column:

In [20]:
set(df_covid_crime_and_cases['SHIFT'].values)

{'DAY', 'EVENING', 'MIDNIGHT'}

In [21]:
df_by_shift = copy.deepcopy(df_covid_crime_and_cases[["date", "SHIFT"]])
df_by_shift['total_crime_count_per_shift'] = 0

df_by_shift = df_by_shift.groupby(by=['date', "SHIFT"]).count()
df_by_shift = df_by_shift.reset_index()

df_by_shift

Unnamed: 0,date,SHIFT,total_crime_count_per_shift
0,2020-03-07,DAY,28
1,2020-03-07,EVENING,25
2,2020-03-07,MIDNIGHT,12
3,2020-03-08,DAY,17
4,2020-03-08,EVENING,22
...,...,...,...
1599,2021-10-20,DAY,10
1600,2021-10-20,EVENING,29
1601,2021-10-20,MIDNIGHT,9
1602,2021-10-21,EVENING,1


In [22]:
df_SHIFT_DAY = copy.deepcopy(df_by_shift[df_by_shift["SHIFT"]=="DAY"][['date','total_crime_count_per_shift']].reset_index())
df_SHIFT_MIDNIGHT = copy.deepcopy(df_by_shift[df_by_shift["SHIFT"]=="MIDNIGHT"][['date','total_crime_count_per_shift']].reset_index())
df_SHIFT_EVENING = copy.deepcopy(df_by_shift[df_by_shift["SHIFT"]=="EVENING"][['date','total_crime_count_per_shift']].reset_index())

In [23]:
df_SHIFT_DAY

Unnamed: 0,index,date,total_crime_count_per_shift
0,0,2020-03-07,28
1,3,2020-03-08,17
2,6,2020-03-09,28
3,9,2020-03-10,28
4,12,2020-03-11,27
...,...,...,...
529,1587,2021-10-16,21
530,1590,2021-10-17,26
531,1593,2021-10-18,35
532,1596,2021-10-19,26


In [24]:
# Finally, merge into one:

### First, let's get the base (covid data):

In [25]:
df_covid_cases = pd.read_csv(os.path.join(data_dir, "Merged", "covid_cases_district-of-columbia-history.csv"))
df_covid_cases['date'] = pd.to_datetime(df_covid_cases['date'])

df_covid_cases = df_covid_cases[(df_covid_cases["state"]=="District of Columbia")]
df_covid_cases["covid_deaths"] = df_covid_cases["deaths"]
df_covid_cases["covid_daily_case_count"] = df_covid_cases["cases"]

del df_covid_cases["state"]
del df_covid_cases["deaths"]
del df_covid_cases["cases"]
del df_covid_cases["fips"]

df_covid_cases = df_covid_cases.dropna()

In [26]:
df_covid_cases

Unnamed: 0,date,covid_deaths,covid_daily_case_count
353,2020-03-07,0,1
387,2020-03-08,0,1
422,2020-03-09,0,4
459,2020-03-10,0,4
500,2020-03-11,0,10
...,...,...,...
33783,2021-11-06,1193,64799
33839,2021-11-07,1193,64799
33895,2021-11-08,1193,65050
33951,2021-11-09,1193,65094


In [27]:
df_main = pd.merge(df_crime_rate, df_covid_cases[["date", "covid_daily_case_count", "covid_deaths"]], how='left', on='date')
df_main

Unnamed: 0,date,daily_crime_count,covid_daily_case_count,covid_deaths
0,2020-03-07,65,1,0
1,2020-03-08,56,1,0
2,2020-03-09,82,4,0
3,2020-03-10,71,4,0
4,2020-03-11,70,10,0
...,...,...,...,...
530,2021-10-17,71,62976,1183
531,2021-10-18,83,63305,1184
532,2021-10-19,75,63345,1185
533,2021-10-20,48,63400,1185


In [28]:
# df_crime_rate

# df_ARSON_rate["CT_ARSON_DR"], daily count stored at main column => "total_per_crime_type"
# df_ASSAULT_rate["CT_ASSAULT_DR"]
# df_BURGLARY_rate["CT_BURGLARY_DR"]
# df_HOMICIDE_rate["CT_HOMICIDE_DR"]
# df_MOTOR_THEFT_rate["CT_MOTOR_THEFT_DR"]
# df_ROBBERY_rate["CT_ROBBERY_DR"]
# df_SEX_ABUSE_rate["CT_SEX_ABUSE_DR"]
# df_THEFT_FROM_CAR_rate["CT_THEFT_FROM_CAR_DR"]
# df_THEFT_FROM_OTHER_rate["CT_THEFT_FROM_OTHER_DR"]

# df_WARD_1["CL_WARD_1_DR"], daily count stored at main column => "total_crime_count_per_ward"
# df_WARD_2["CL_WARD_2_DR"]
# df_WARD_3["CL_WARD_3_DR"]
# df_WARD_4["CL_WARD_4_DR"]
# df_WARD_5["CL_WARD_5_DR"]
# df_WARD_6["CL_WARD_6_DR"]
# df_WARD_7["CL_WARD_7_DR"]
# df_WARD_8["CL_WARD_8_DR"]

# df_SHIFT_DAY["CO_SHIFT_MORNING_DR"], daily count stored at main column => "total_crime_count_per_shift"
# df_SHIFT_MIDNIGHT["CO_SHIFT_EVENING_DR"]
# df_SHIFT_EVENING["CO_SHIFT_MIDNIGHT_DR"]
#####################################################

df_main

Unnamed: 0,date,daily_crime_count,covid_daily_case_count,covid_deaths
0,2020-03-07,65,1,0
1,2020-03-08,56,1,0
2,2020-03-09,82,4,0
3,2020-03-10,71,4,0
4,2020-03-11,70,10,0
...,...,...,...,...
530,2021-10-17,71,62976,1183
531,2021-10-18,83,63305,1184
532,2021-10-19,75,63345,1185
533,2021-10-20,48,63400,1185


In [29]:
# get the Crime Type in:

In [30]:
# df_ARSON_rate["CT_ARSON_DR"], daily count stored at main column => "total_per_crime_type"
# df_ASSAULT_rate["CT_ASSAULT_DR"]
# df_BURGLARY_rate["CT_BURGLARY_DR"]
# df_HOMICIDE_rate["CT_HOMICIDE_DR"]
# df_MOTOR_THEFT_rate["CT_MOTOR_THEFT_DR"]
# df_ROBBERY_rate["CT_ROBBERY_DR"]
# df_SEX_ABUSE_rate["CT_SEX_ABUSE_DR"]
# df_THEFT_FROM_CAR_rate["CT_THEFT_FROM_CAR_DR"]
# df_THEFT_FROM_OTHER_rate["CT_THEFT_FROM_OTHER_DR"]

In [31]:
df_main = pd.merge(df_main, df_ARSON_rate[["date", "total_per_crime_type"]], how='left', on='date')
df_main.rename(columns={'total_per_crime_type':'CT_ARSON_DR'}, inplace=True)
df_main["CT_ARSON_DR"] = pd.to_numeric(df_main["CT_ARSON_DR"], errors='coerce')
df_main['CT_ARSON_DR'] = df_main['CT_ARSON_DR'].fillna(0) #tmp["total_per_crime_type"]
df_main['CT_ARSON_DR'] = df_main['CT_ARSON_DR'].astype(int)

df_main = pd.merge(df_main, df_ASSAULT_rate[["date", "total_per_crime_type"]], how='left', on='date')
df_main.rename(columns={'total_per_crime_type':'CT_ASSAULT_DR'}, inplace=True)
df_main["CT_ASSAULT_DR"] = pd.to_numeric(df_main["CT_ASSAULT_DR"], errors='coerce')
df_main['CT_ASSAULT_DR'] = df_main['CT_ASSAULT_DR'].fillna(0) #tmp["total_per_crime_type"]
df_main['CT_ASSAULT_DR'] = df_main['CT_ASSAULT_DR'].astype(int)

df_main = pd.merge(df_main, df_BURGLARY_rate[["date", "total_per_crime_type"]], how='left', on='date')
df_main.rename(columns={'total_per_crime_type':'CT_BURGLARY_DR'}, inplace=True)
df_main["CT_BURGLARY_DR"] = pd.to_numeric(df_main["CT_BURGLARY_DR"], errors='coerce')
df_main['CT_BURGLARY_DR'] = df_main['CT_BURGLARY_DR'].fillna(0) #tmp["total_per_crime_type"]
df_main['CT_BURGLARY_DR'] = df_main['CT_BURGLARY_DR'].astype(int)

df_main = pd.merge(df_main, df_HOMICIDE_rate[["date", "total_per_crime_type"]], how='left', on='date')
df_main.rename(columns={'total_per_crime_type':'CT_HOMICIDE_DR'}, inplace=True)
df_main["CT_HOMICIDE_DR"] = pd.to_numeric(df_main["CT_HOMICIDE_DR"], errors='coerce')
df_main['CT_HOMICIDE_DR'] = df_main['CT_HOMICIDE_DR'].fillna(0) #tmp["total_per_crime_type"]
df_main['CT_HOMICIDE_DR'] = df_main['CT_HOMICIDE_DR'].astype(int)

df_main = pd.merge(df_main, df_MOTOR_THEFT_rate[["date", "total_per_crime_type"]], how='left', on='date')
df_main.rename(columns={'total_per_crime_type':'CT_MOTOR_THEFT_DR'}, inplace=True)
df_main["CT_MOTOR_THEFT_DR"] = pd.to_numeric(df_main["CT_MOTOR_THEFT_DR"], errors='coerce')
df_main['CT_MOTOR_THEFT_DR'] = df_main['CT_MOTOR_THEFT_DR'].fillna(0) #tmp["total_per_crime_type"]
df_main['CT_MOTOR_THEFT_DR'] = df_main['CT_MOTOR_THEFT_DR'].astype(int)

df_main = pd.merge(df_main, df_ROBBERY_rate[["date", "total_per_crime_type"]], how='left', on='date')
df_main.rename(columns={'total_per_crime_type':'CT_ROBBERY_DR'}, inplace=True)
df_main["CT_ROBBERY_DR"] = pd.to_numeric(df_main["CT_ROBBERY_DR"], errors='coerce')
df_main['CT_ROBBERY_DR'] = df_main['CT_ROBBERY_DR'].fillna(0) #tmp["total_per_crime_type"]
df_main['CT_ROBBERY_DR'] = df_main['CT_ROBBERY_DR'].astype(int)

df_main = pd.merge(df_main, df_SEX_ABUSE_rate[["date", "total_per_crime_type"]], how='left', on='date')
df_main.rename(columns={'total_per_crime_type':'CT_SEX_ABUSE_DR'}, inplace=True)
df_main["CT_SEX_ABUSE_DR"] = pd.to_numeric(df_main["CT_SEX_ABUSE_DR"], errors='coerce')
df_main['CT_SEX_ABUSE_DR'] = df_main['CT_SEX_ABUSE_DR'].fillna(0) #tmp["total_per_crime_type"]
df_main['CT_SEX_ABUSE_DR'] = df_main['CT_SEX_ABUSE_DR'].astype(int)

df_main = pd.merge(df_main, df_THEFT_FROM_CAR_rate[["date", "total_per_crime_type"]], how='left', on='date')
df_main.rename(columns={'total_per_crime_type':'CT_THEFT_FROM_CAR_DR'}, inplace=True)
df_main["CT_THEFT_FROM_CAR_DR"] = pd.to_numeric(df_main["CT_THEFT_FROM_CAR_DR"], errors='coerce')
df_main['CT_THEFT_FROM_CAR_DR'] = df_main['CT_THEFT_FROM_CAR_DR'].fillna(0) #tmp["total_per_crime_type"]
df_main['CT_THEFT_FROM_CAR_DR'] = df_main['CT_THEFT_FROM_CAR_DR'].astype(int)

df_main = pd.merge(df_main, df_THEFT_FROM_OTHER_rate[["date", "total_per_crime_type"]], how='left', on='date')
df_main.rename(columns={'total_per_crime_type':'CT_THEFT_FROM_OTHER_DR'}, inplace=True)
df_main["CT_THEFT_FROM_OTHER_DR"] = pd.to_numeric(df_main["CT_THEFT_FROM_OTHER_DR"], errors='coerce')
df_main['CT_THEFT_FROM_OTHER_DR'] = df_main['CT_THEFT_FROM_OTHER_DR'].fillna(0) #tmp["total_per_crime_type"]
df_main['CT_THEFT_FROM_OTHER_DR'] = df_main['CT_THEFT_FROM_OTHER_DR'].astype(int)

df_main.head(5)

Unnamed: 0,date,daily_crime_count,covid_daily_case_count,covid_deaths,CT_ARSON_DR,CT_ASSAULT_DR,CT_BURGLARY_DR,CT_HOMICIDE_DR,CT_MOTOR_THEFT_DR,CT_ROBBERY_DR,CT_SEX_ABUSE_DR,CT_THEFT_FROM_CAR_DR,CT_THEFT_FROM_OTHER_DR
0,2020-03-07,65,1,0,0,3,2,0,6,3,0,17,34
1,2020-03-08,56,1,0,0,1,1,0,7,7,0,21,19
2,2020-03-09,82,4,0,0,3,1,0,4,1,0,37,36
3,2020-03-10,71,4,0,0,5,8,1,6,1,0,17,33
4,2020-03-11,70,10,0,0,0,3,0,4,1,0,28,34


In [32]:
# Get the ward data in:

In [33]:
# df_WARD_1["CL_WARD_1_DR"], daily count stored at main column => "total_crime_count_per_ward"
# df_WARD_2["CL_WARD_2_DR"]
# df_WARD_3["CL_WARD_3_DR"]
# df_WARD_4["CL_WARD_4_DR"]
# df_WARD_5["CL_WARD_5_DR"]
# df_WARD_6["CL_WARD_6_DR"]
# df_WARD_7["CL_WARD_7_DR"]
# df_WARD_8["CL_WARD_8_DR"]

In [34]:
df_main = pd.merge(df_main, df_WARD_1[["date", "total_crime_count_per_ward"]], how='left', on='date')
df_main.rename(columns={'total_crime_count_per_ward':'CL_WARD_1_DR'}, inplace=True)
df_main["CL_WARD_1_DR"] = pd.to_numeric(df_main["CL_WARD_1_DR"], errors='coerce')
df_main['CL_WARD_1_DR'] = df_main['CL_WARD_1_DR'].fillna(0) #tmp["total_per_crime_type"]
df_main['CL_WARD_1_DR'] = df_main['CL_WARD_1_DR'].astype(int)

df_main = pd.merge(df_main, df_WARD_2[["date", "total_crime_count_per_ward"]], how='left', on='date')
df_main.rename(columns={'total_crime_count_per_ward':'CL_WARD_2_DR'}, inplace=True)
df_main["CL_WARD_2_DR"] = pd.to_numeric(df_main["CL_WARD_2_DR"], errors='coerce')
df_main['CL_WARD_2_DR'] = df_main['CL_WARD_2_DR'].fillna(0) #tmp["total_per_crime_type"]
df_main['CL_WARD_2_DR'] = df_main['CL_WARD_2_DR'].astype(int)

df_main = pd.merge(df_main, df_WARD_3[["date", "total_crime_count_per_ward"]], how='left', on='date')
df_main.rename(columns={'total_crime_count_per_ward':'CL_WARD_3_DR'}, inplace=True)
df_main["CL_WARD_3_DR"] = pd.to_numeric(df_main["CL_WARD_3_DR"], errors='coerce')
df_main['CL_WARD_3_DR'] = df_main['CL_WARD_3_DR'].fillna(0) #tmp["total_per_crime_type"]
df_main['CL_WARD_3_DR'] = df_main['CL_WARD_3_DR'].astype(int)

df_main = pd.merge(df_main, df_WARD_4[["date", "total_crime_count_per_ward"]], how='left', on='date')
df_main.rename(columns={'total_crime_count_per_ward':'CL_WARD_4_DR'}, inplace=True)
df_main["CL_WARD_4_DR"] = pd.to_numeric(df_main["CL_WARD_4_DR"], errors='coerce')
df_main['CL_WARD_4_DR'] = df_main['CL_WARD_4_DR'].fillna(0) #tmp["total_per_crime_type"]
df_main['CL_WARD_4_DR'] = df_main['CL_WARD_4_DR'].astype(int)

df_main = pd.merge(df_main, df_WARD_5[["date", "total_crime_count_per_ward"]], how='left', on='date')
df_main.rename(columns={'total_crime_count_per_ward':'CL_WARD_5_DR'}, inplace=True)
df_main["CL_WARD_5_DR"] = pd.to_numeric(df_main["CL_WARD_5_DR"], errors='coerce')
df_main['CL_WARD_5_DR'] = df_main['CL_WARD_5_DR'].fillna(0) #tmp["total_per_crime_type"]
df_main['CL_WARD_5_DR'] = df_main['CL_WARD_5_DR'].astype(int)

df_main = pd.merge(df_main, df_WARD_6[["date", "total_crime_count_per_ward"]], how='left', on='date')
df_main.rename(columns={'total_crime_count_per_ward':'CL_WARD_6_DR'}, inplace=True)
df_main["CL_WARD_6_DR"] = pd.to_numeric(df_main["CL_WARD_6_DR"], errors='coerce')
df_main['CL_WARD_6_DR'] = df_main['CL_WARD_6_DR'].fillna(0) #tmp["total_per_crime_type"]
df_main['CL_WARD_6_DR'] = df_main['CL_WARD_6_DR'].astype(int)

df_main = pd.merge(df_main, df_WARD_7[["date", "total_crime_count_per_ward"]], how='left', on='date')
df_main.rename(columns={'total_crime_count_per_ward':'CL_WARD_7_DR'}, inplace=True)
df_main["CL_WARD_7_DR"] = pd.to_numeric(df_main["CL_WARD_7_DR"], errors='coerce')
df_main['CL_WARD_7_DR'] = df_main['CL_WARD_7_DR'].fillna(0) #tmp["total_per_crime_type"]
df_main['CL_WARD_7_DR'] = df_main['CL_WARD_7_DR'].astype(int)

df_main = pd.merge(df_main, df_WARD_8[["date", "total_crime_count_per_ward"]], how='left', on='date')
df_main.rename(columns={'total_crime_count_per_ward':'CL_WARD_8_DR'}, inplace=True)
df_main["CL_WARD_8_DR"] = pd.to_numeric(df_main["CL_WARD_8_DR"], errors='coerce')
df_main['CL_WARD_8_DR'] = df_main['CL_WARD_8_DR'].fillna(0) #tmp["total_per_crime_type"]
df_main['CL_WARD_8_DR'] = df_main['CL_WARD_8_DR'].astype(int)

In [35]:
df_main.head(3)

Unnamed: 0,date,daily_crime_count,covid_daily_case_count,covid_deaths,CT_ARSON_DR,CT_ASSAULT_DR,CT_BURGLARY_DR,CT_HOMICIDE_DR,CT_MOTOR_THEFT_DR,CT_ROBBERY_DR,...,CT_THEFT_FROM_CAR_DR,CT_THEFT_FROM_OTHER_DR,CL_WARD_1_DR,CL_WARD_2_DR,CL_WARD_3_DR,CL_WARD_4_DR,CL_WARD_5_DR,CL_WARD_6_DR,CL_WARD_7_DR,CL_WARD_8_DR
0,2020-03-07,65,1,0,0,3,2,0,6,3,...,17,34,12,13,6,7,10,5,7,5
1,2020-03-08,56,1,0,0,1,1,0,7,7,...,21,19,11,9,1,5,8,11,5,6
2,2020-03-09,82,4,0,0,3,1,0,4,1,...,37,36,5,14,7,12,12,14,16,2


In [36]:
# df_SHIFT_DAY["CO_SHIFT_MORNING_DR"], daily count stored at main column => "total_crime_count_per_shift"
# df_SHIFT_MIDNIGHT["CO_SHIFT_EVENING_DR"]
# df_SHIFT_EVENING["CO_SHIFT_MIDNIGHT_DR"]

In [37]:
df_main = pd.merge(df_main, df_SHIFT_DAY[["date", "total_crime_count_per_shift"]], how='left', on='date')
df_main.rename(columns={'total_crime_count_per_shift':'CO_SHIFT_MORNING_DR'}, inplace=True)
df_main["CO_SHIFT_MORNING_DR"] = pd.to_numeric(df_main["CO_SHIFT_MORNING_DR"], errors='coerce')
df_main['CO_SHIFT_MORNING_DR'] = df_main['CO_SHIFT_MORNING_DR'].fillna(0) #tmp["total_per_crime_type"]
df_main['CO_SHIFT_MORNING_DR'] = df_main['CO_SHIFT_MORNING_DR'].astype(int)

df_main = pd.merge(df_main, df_SHIFT_EVENING[["date", "total_crime_count_per_shift"]], how='left', on='date')
df_main.rename(columns={'total_crime_count_per_shift':'CO_SHIFT_EVENING_DR'}, inplace=True)
df_main["CO_SHIFT_EVENING_DR"] = pd.to_numeric(df_main["CO_SHIFT_EVENING_DR"], errors='coerce')
df_main['CO_SHIFT_EVENING_DR'] = df_main['CO_SHIFT_EVENING_DR'].fillna(0) #tmp["total_per_crime_type"]
df_main['CO_SHIFT_EVENING_DR'] = df_main['CO_SHIFT_EVENING_DR'].astype(int)

df_main = pd.merge(df_main, df_SHIFT_MIDNIGHT[["date", "total_crime_count_per_shift"]], how='left', on='date')
df_main.rename(columns={'total_crime_count_per_shift':'CO_SHIFT_MIDNIGHT_DR'}, inplace=True)
df_main["CO_SHIFT_MIDNIGHT_DR"] = pd.to_numeric(df_main["CO_SHIFT_MIDNIGHT_DR"], errors='coerce')
df_main['CO_SHIFT_MIDNIGHT_DR'] = df_main['CO_SHIFT_MIDNIGHT_DR'].fillna(0) #tmp["total_per_crime_type"]
df_main['CO_SHIFT_MIDNIGHT_DR'] = df_main['CO_SHIFT_MIDNIGHT_DR'].astype(int)

In [38]:
df_main = df_main.dropna()

In [39]:
df_main.dtypes

date                      datetime64[ns]
daily_crime_count                  int64
covid_daily_case_count             int64
covid_deaths                       int64
CT_ARSON_DR                        int32
CT_ASSAULT_DR                      int32
CT_BURGLARY_DR                     int32
CT_HOMICIDE_DR                     int32
CT_MOTOR_THEFT_DR                  int32
CT_ROBBERY_DR                      int32
CT_SEX_ABUSE_DR                    int32
CT_THEFT_FROM_CAR_DR               int32
CT_THEFT_FROM_OTHER_DR             int32
CL_WARD_1_DR                       int32
CL_WARD_2_DR                       int32
CL_WARD_3_DR                       int32
CL_WARD_4_DR                       int32
CL_WARD_5_DR                       int32
CL_WARD_6_DR                       int32
CL_WARD_7_DR                       int32
CL_WARD_8_DR                       int32
CO_SHIFT_MORNING_DR                int32
CO_SHIFT_EVENING_DR                int32
CO_SHIFT_MIDNIGHT_DR               int32
dtype: object

In [40]:
df_main

Unnamed: 0,date,daily_crime_count,covid_daily_case_count,covid_deaths,CT_ARSON_DR,CT_ASSAULT_DR,CT_BURGLARY_DR,CT_HOMICIDE_DR,CT_MOTOR_THEFT_DR,CT_ROBBERY_DR,...,CL_WARD_2_DR,CL_WARD_3_DR,CL_WARD_4_DR,CL_WARD_5_DR,CL_WARD_6_DR,CL_WARD_7_DR,CL_WARD_8_DR,CO_SHIFT_MORNING_DR,CO_SHIFT_EVENING_DR,CO_SHIFT_MIDNIGHT_DR
0,2020-03-07,65,1,0,0,3,2,0,6,3,...,13,6,7,10,5,7,5,28,25,12
1,2020-03-08,56,1,0,0,1,1,0,7,7,...,9,1,5,8,11,5,6,17,22,17
2,2020-03-09,82,4,0,0,3,1,0,4,1,...,14,7,12,12,14,16,2,28,38,16
3,2020-03-10,71,4,0,0,5,8,1,6,1,...,19,3,5,9,11,8,8,28,26,17
4,2020-03-11,70,10,0,0,0,3,0,4,1,...,15,3,11,8,12,8,5,27,26,17
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
530,2021-10-17,71,62976,1183,0,2,3,1,15,5,...,15,1,8,11,9,7,4,26,24,21
531,2021-10-18,83,63305,1184,0,4,7,0,14,1,...,14,6,11,7,22,11,3,35,35,13
532,2021-10-19,75,63345,1185,0,3,4,0,10,3,...,10,4,4,19,11,4,8,26,36,13
533,2021-10-20,48,63400,1185,0,5,2,0,4,3,...,5,0,7,8,7,8,5,10,29,9


In [41]:
df_main.to_csv("daily_crime_during_COVID.csv")

In [None]:
# Notes:

In [12]:
START_DATE
REPORT_DAT
SHIFT
METHOD
OFFENSE
BLOCK
XBLOCK
YBLOCK
WARD => MAIN DEPENDENT (HOT ENCODED)
DISTRICT
LATITUDE
LONGITUDE
END_DATE
geometry
date
covid_daily_case_count => FEATURE 1
covid_deaths

+
metro_distance
nightlife_distance

SyntaxError: invalid syntax (Temp/ipykernel_18364/211604432.py, line 9)

In [None]:
Supervised => Prediction (variation in number of crimes), depends on location, ward, night life, covid cases, metro location
Then we look at pre-post and compare (using 2 separate models)

Check by ward and not by ward
Then test on a separate variable

When you see no difference between pre and post.

Single dataset. You augment data.

If there is a difference: then only check by ward, if not, check other variable
If there is no, you stop.

Add the stat model table and compare those two samples.

Start going further if there is a difference (you start doing by wards, etc.)

Crosscall Wallace

If you have a really low R^2, it means you are missing some feature data.

LASSO to drive to 0.

Regression RFR.

Pre and post, measure accuracy : if there are differences in accuracies, you want to look at the variables
    In this case we use RFR, to get regression