In [4]:
import numpy as np
import pandas as pd
import os

# Generate from raw data

In [5]:
data_path = "data_2019.csv"
data = pd.read_csv(data_path)
data.head()

Unnamed: 0,ID,Date,Primary Type,Latitude,Longitude,Community Area
0,11864018,09/24/2019 08:00:00 AM,DECEPTIVE PRACTICE,41.852248,-87.623786,33.0
1,11859805,10/13/2019 08:30:00 PM,THEFT,41.895732,-87.687784,24.0
2,11863808,10/05/2019 06:30:00 PM,THEFT,41.882002,-87.662287,28.0
3,11859727,10/13/2019 07:00:00 PM,CRIMINAL DAMAGE,41.946987,-87.669164,6.0
4,11859656,10/13/2019 02:10:00 PM,ASSAULT,41.975838,-87.659854,3.0


In [6]:
# get the counts of primary type
data["Primary Type"].value_counts()

THEFT                                62484
BATTERY                              49513
CRIMINAL DAMAGE                      26681
ASSAULT                              20619
DECEPTIVE PRACTICE                   18975
OTHER OFFENSE                        16794
NARCOTICS                            15069
BURGLARY                              9639
MOTOR VEHICLE THEFT                   8977
ROBBERY                               7994
CRIMINAL TRESPASS                     6818
WEAPONS VIOLATION                     6339
OFFENSE INVOLVING CHILDREN            2413
INTERFERENCE WITH PUBLIC OFFICER      1546
PUBLIC PEACE VIOLATION                1520
SEX OFFENSE                           1357
CRIM SEXUAL ASSAULT                    913
CRIMINAL SEXUAL ASSAULT                738
PROSTITUTION                           681
HOMICIDE                               508
ARSON                                  376
LIQUOR LAW VIOLATION                   232
STALKING                               226
CONCEALED C

In [7]:
types_to_keep = ['THEFT', 'BATTERY', 'CRIMINAL DAMAGE', 'NARCOTICS', 'ASSAULT', 'DECEPTIVE PRACTICE', 'BURGLARY', 'ROBBERY']
data = data[data["Primary Type"].isin(types_to_keep)]    
data["Primary Type"].value_counts()

THEFT                 62484
BATTERY               49513
CRIMINAL DAMAGE       26681
ASSAULT               20619
DECEPTIVE PRACTICE    18975
NARCOTICS             15069
BURGLARY               9639
ROBBERY                7994
Name: Primary Type, dtype: int64

In [8]:
# create a 365 x 77 x 4 matrix to store the number of crimes per day per community area per type
crime_matrix = np.zeros((365, 77, 8))

# iterate over the rows of the dataframe
for index, row in data.iterrows():
    # get the date
    date = row["Date"]
    # get the community area
    community_area = int(row["Community Area"])
    # get the crime type
    crime_type = row["Primary Type"]
    # get the day of the year
    day_of_year = pd.to_datetime(date).dayofyear
    # update the matrix

    crime_matrix[day_of_year - 1, community_area - 1, types_to_keep.index(crime_type)] += 1

crime_matrix.shape

(365, 77, 8)

In [6]:
crime_matrix[:, 0, :]

array([[2., 4., 4., ..., 3., 0., 1.],
       [3., 2., 1., ..., 0., 1., 0.],
       [1., 4., 3., ..., 1., 0., 0.],
       ...,
       [2., 2., 3., ..., 0., 0., 0.],
       [2., 0., 5., ..., 0., 0., 0.],
       [1., 1., 0., ..., 0., 1., 0.]])

In [7]:
# store the crime counts region-wise in a text file
# 8 rows x 365 columns
# each row corresponds to a crime type
# each column corresponds to a day of the year

save_dir = "AIST_Custom_DATASET/Train/"

for i in range(77):
    with open(save_dir + "r_" + str(i) + ".txt", "w") as f:
        for j in range(8):
            for k in range(365):
                f.write(str(int(crime_matrix[k, i, j])))
                f.write(" ")
            f.write("\n")

In [8]:
# load crime counts of a region from a text file into a numpy array
# 8 rows x 365 columns
# each row corresponds to a crime type
# each column corresponds to a day of the year
save_dir = "AIST_Custom_DATASET/Train/"

r0 = np.loadtxt(save_dir + "r_0.txt")
r0.shape

(8, 365)

# Test

In [16]:
data_path = "christmas_2020_all_crimes.csv"
data = pd.read_csv(data_path)

types_to_keep = ['THEFT', 'BATTERY', 'CRIMINAL DAMAGE', 'NARCOTICS', 'ASSAULT', 'DECEPTIVE PRACTICE', 'BURGLARY', 'ROBBERY']
data = data[data["Primary Type"].isin(types_to_keep)]    

crime_matrix = np.zeros((32, 77, 8))

# iterate over the rows of the dataframe
for index, row in data.iterrows():
    # get the date
    date = row["Date"]
    # get the community area
    community_area = int(row["Community Area"])
    # get the crime type
    crime_type = row["Primary Type"]
    # get the day of the year
    # dec 15 to jan 15
    if pd.to_datetime(date).month == 12:
        day_of_year = pd.to_datetime(date).day - 15
    else:
        day_of_year = pd.to_datetime(date).day + 31 - 15

    crime_matrix[day_of_year - 1, community_area - 1, types_to_keep.index(crime_type)] += 1

# store the crime counts region-wise in a text file
# 8 rows x 365 columns
# each row corresponds to a crime type
# each column corresponds to a day of the year

save_dir = "AIST_Custom_DATASET/Test_Christmas/"

for i in range(77):
    with open(save_dir + "r_" + str(i) + ".txt", "w") as f:
        for j in range(8):
            for k in range(32):
                f.write(str(int(crime_matrix[k, i, j])))
                f.write(" ")
            f.write("\n")

In [None]:
data_path = "christmas_2020_all_crimes.csv"
data = pd.read_csv(data_path)

types_to_keep = ['THEFT', 'BATTERY', 'CRIMINAL DAMAGE', 'NARCOTICS', 'ASSAULT', 'DECEPTIVE PRACTICE', 'BURGLARY', 'ROBBERY']
data = data[data["Primary Type"].isin(types_to_keep)]    

crime_matrix = np.zeros((32, 77, 8))

# iterate over the rows of the dataframe
for index, row in data.iterrows():
    # get the date
    date = row["Date"]
    # get the community area
    community_area = int(row["Community Area"])
    # get the crime type
    crime_type = row["Primary Type"]
    # get the day of the year
    # dec 15 to jan 15
    if pd.to_datetime(date).month == 12:
        day_of_year = pd.to_datetime(date).day - 15
    else:
        day_of_year = pd.to_datetime(date).day + 31 - 15

    crime_matrix[day_of_year - 1, community_area - 1, types_to_keep.index(crime_type)] += 1

# store the crime counts region-wise in a text file
# 8 rows x 365 columns
# each row corresponds to a crime type
# each column corresponds to a day of the year

save_dir = "AIST_Custom_DATASET/Test_Christmas/"

for i in range(77):
    with open(save_dir + "r_" + str(i) + ".txt", "w") as f:
        for j in range(8):
            for k in range(32):
                f.write(str(int(crime_matrix[k, i, j])))
                f.write(" ")
            f.write("\n")

In [17]:
data_path = "./easter_2020_all_crimes.csv"
data = pd.read_csv(data_path)

types_to_keep = ['THEFT', 'BATTERY', 'CRIMINAL DAMAGE', 'NARCOTICS', 'ASSAULT', 'DECEPTIVE PRACTICE', 'BURGLARY', 'ROBBERY']
data = data[data["Primary Type"].isin(types_to_keep)]    

crime_matrix = np.zeros((21, 77, 8))

# iterate over the rows of the dataframe
for index, row in data.iterrows():
    # get the date
    date = row["Date"]
    # get the community area
    community_area = int(row["Community Area"])
    # get the crime type
    crime_type = row["Primary Type"]
    # get the day of the year
    # april 16 to may 6
    if pd.to_datetime(date).month == 4:
        day_of_year = pd.to_datetime(date).day - 16
    else:
        day_of_year = pd.to_datetime(date).day + 30 - 16

    crime_matrix[day_of_year - 1, community_area - 1, types_to_keep.index(crime_type)] += 1

# store the crime counts region-wise in a text file
# 8 rows x 365 columns
# each row corresponds to a crime type
# each column corresponds to a day of the year

save_dir = "AIST_Custom_DATASET/Test_easter/"

for i in range(77):
    with open(save_dir + "r_" + str(i) + ".txt", "w") as f:
        for j in range(8):
            for k in range(21):
                f.write(str(int(crime_matrix[k, i, j])))
                f.write(" ")
            f.write("\n")

In [19]:
data_path = "./data_2020_upto_MARCH.csv"
data = pd.read_csv(data_path)

types_to_keep = ['THEFT', 'BATTERY', 'CRIMINAL DAMAGE', 'NARCOTICS', 'ASSAULT', 'DECEPTIVE PRACTICE', 'BURGLARY', 'ROBBERY']
data = data[data["Primary Type"].isin(types_to_keep)]    

crime_matrix = np.zeros((91, 77, 8))

# iterate over the rows of the dataframe
for index, row in data.iterrows():
    # get the date
    date = row["Date"]
    # get the community area
    community_area = int(row["Community Area"])
    # get the crime type
    crime_type = row["Primary Type"]
    # get the day of the year
    day_of_year = pd.to_datetime(date).dayofyear

    crime_matrix[day_of_year - 1, community_area - 1, types_to_keep.index(crime_type)] += 1

# store the crime counts region-wise in a text file
# 8 rows x 365 columns
# each row corresponds to a crime type
# each column corresponds to a day of the year

save_dir = "AIST_Custom_DATASET/Test_upto_march/"

for i in range(77):
    with open(save_dir + "r_" + str(i) + ".txt", "w") as f:
        for j in range(8):
            for k in range(91):
                f.write(str(int(crime_matrix[k, i, j])))
                f.write(" ")
            f.write("\n")

# Generate Side Crimes

In [15]:
sides = [
    [1, 2, 3, 4, 77, 9, 10, 11, 12, 13, 14, 76],
    [15, 16, 17, 18, 19, 20],
    [5, 6, 7, 21, 22],
    [23, 24, 25, 26, 27, 28, 29, 30, 31],
    [8, 32, 33],
    [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 60, 69],
    [56, 57, 58, 59, 61, 62, 63, 64, 65, 66, 67, 68],
    [70, 71, 72, 73, 74, 75],
    [44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55],
]

side_crimes = np.zeros((365, 9, 8))

# group the crimes by side and add them up
for i in range(77):
    for j in range(365):
        for k in range(8):
            for l in sides:
                if i + 1 in l:
                    side_crimes[j, sides.index(l), k] += crime_matrix[j, i, k]

side_crimes

array([[[ 9., 18.,  9., ..., 19.,  3.,  3.],
        [10., 11.,  4., ...,  4.,  2.,  0.],
        [16., 12.,  8., ..., 14.,  2.,  1.],
        ...,
        [11., 29., 14., ..., 18.,  3.,  6.],
        [ 4., 12.,  9., ..., 18.,  2.,  2.],
        [17., 35., 19., ..., 21.,  2.,  5.]],

       [[17.,  7.,  2., ..., 10.,  3.,  0.],
        [ 4.,  3.,  3., ...,  4.,  3.,  3.],
        [15.,  7.,  0., ...,  6.,  5.,  3.],
        ...,
        [18.,  9.,  9., ...,  6.,  4.,  0.],
        [ 7.,  7.,  7., ...,  5.,  0.,  0.],
        [12., 21., 13., ...,  7.,  3.,  4.]],

       [[24., 11., 11., ..., 10.,  2.,  1.],
        [ 8.,  3.,  4., ...,  3.,  3.,  4.],
        [16.,  1.,  6., ...,  4.,  2.,  0.],
        ...,
        [31., 25., 13., ...,  4.,  1.,  5.],
        [ 9.,  9.,  7., ...,  5.,  3.,  1.],
        [16., 19., 10., ...,  4., 10.,  2.]],

       ...,

       [[19.,  8., 10., ...,  6.,  1.,  1.],
        [ 6.,  5.,  6., ...,  2.,  2.,  1.],
        [22.,  6.,  1., ...,  2.,  1.,  2.

In [20]:
save_dir = "AIST_HOLIDAY_DATASET/Side Crime/"

for i in range(9):
    with open(save_dir + "s_" + str(i) + ".txt", "w") as f:
        for j in range(8):
            for k in range(365):
                f.write(str(int(side_crimes[k, i, j])))
                f.write(" ")
            f.write("\n")

# Generate from existing AIST dataset

In [10]:
crime_matrix = np.zeros((8, 77, 365 * 6))

path = "../AIST/aist-main/data/chicago/com_crime/"

for i in range(77):
    crime_matrix[:, i, :] = np.loadtxt(path + "r_" + str(i) + ".txt")

crime_matrix.shape

(8, 77, 2190)

In [11]:
# sum up every 6 time steps to get a 365 x 77 x 8 matrix

crime_matrix_6 = np.zeros((8, 77, 365))

for i in range(365):
    crime_matrix_6[:, :, i] = np.sum(crime_matrix[:, :, i * 6 : (i + 1) * 6], axis = 2)

crime_matrix_6.shape

(8, 77, 365)

In [12]:
os.mkdir("Comm_wise_data/aist_1d_from_existing/")

In [13]:
# save the crime counts region-wise in a text file
# 8 rows x 365 columns
# each row corresponds to a crime type
# each column corresponds to a day of the year

save_dir = "Comm_wise_data/aist_1d_from_existing/"

for i in range(77):
    with open(save_dir + "r_" + str(i) + ".txt", "w") as f:
        for j in range(8):
            for k in range(365):
                f.write(str(int(crime_matrix[j, i, k])))
                f.write(" ")
            f.write("\n")

# Generate Side Crimes from AIST dataset for 1 day

In [16]:
side_crime_matrix = np.zeros((8, 9, 365 * 6))

path = "../AIST/aist-main/data/chicago/side_crime/"

for i in range(9):
    side_crime_matrix[:, i, :] = np.loadtxt(path + "s_" + str(i) + ".txt")

side_crime_matrix.shape

(8, 9, 2190)

In [18]:
# sum up every 6 time steps to get a 365 x 9 x 8 matrix

side_crime_matrix_6 = np.zeros((8, 9, 365))

for i in range(365):
    side_crime_matrix_6[:, :, i] = np.sum(side_crime_matrix[:, :, i * 6 : (i + 1) * 6], axis = 2)

side_crime_matrix_6.shape

(8, 9, 365)

In [20]:
os.mkdir("Comm_wise_data/aist_1d_from_existing_side/")

In [21]:
# save the crime counts region-wise in a text file
# 8 rows x 365 columns
# each row corresponds to a crime type
# each column corresponds to a day of the year

save_dir = "Comm_wise_data/aist_1d_from_existing_side/"

for i in range(9):
    with open(save_dir + "s_" + str(i) + ".txt", "w") as f:
        for j in range(8):
            for k in range(365):
                f.write(str(int(side_crime_matrix[j, i, k])))
                f.write(" ")
            f.write("\n")