EvalCustomGeneric

In [1]:
# Some fairly standard modules
import os, csv, lzma
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import descartes
from itertools import product
from collections import Counter

# The geopandas module does not come standard with anaconda,
# so you'll need to run the anaconda prompt as an administrator
# and install it via "conda install -c conda-forge geopandas".
# That installation will include pyproj and shapely automatically.
# These are useful modules for plotting geospatial data.
import geopandas as gpd
import pyproj
import shapely.geometry

# These modules are useful for tracking where modules are
# imported from, e.g., to check we're using our local edited
# versions of open_cp scripts.
import sys
import inspect
import importlib

# In order to use our local edited versions of open_cp
# scripts, we insert the parent directory of the current
# file ("..") at the start of our sys.path here.
sys.path.insert(0, os.path.abspath(".."))

# Elements from PredictCode's custom "open_cp" package
import open_cp
import open_cp.geometry
import open_cp.plot
import open_cp.sources.chicago as chicago
import open_cp.retrohotspot as retro

In [8]:
# Declare data directory
datadir = os.path.join("..", "..", "Data")
# *** data directory

chicago_file_path = os.path.join(datadir, "chicago.csv")
# *** file name (or just combine with data directory)

# Set the data directory for built-in Chicago functions
#chicago.set_data_directory(datadir)

# Obtain polygon shapely object for South side
region_polygon = chicago.get_side("South")
# *** which side of Chicago

# Obtain GeoDataFrame with polygon's geometry
#  and with CRS epsg:2790
#ss_frame = gpd.GeoDataFrame({"name":["South Side"]})
#ss_frame.geometry = [ss_polygon]
#ss_frame.crs = {"init":"epsg:2790"}

In [16]:
# Obtain all Chicago theft data in file as TimedPoints
points_crime = chicago.load(chicago_file_path, {"THEFT"})
# *** which type of crime

# Declare time boundaries for training and testing data
start_train = np.datetime64("2018-03-01")
end_train = np.datetime64("2018-05-01")
start_test = np.datetime64("2018-05-01")
end_test = np.datetime64("2018-06-01")
# *** 4 different time boundaries


# Obtain all Chicago theft data within training period
points_crime_train = points_crime[(points_crime.timestamps >= start_train) 
                & (points_crime.timestamps <= end_train)]

# Obtain subset from South Side

points_crime_train_region = open_cp.geometry.intersect_timed_points(points_crime_train,
                                                                region_polygon)

print(type(points_crime))
print(type(points_crime_train))

points_crime_region = open_cp.geometry.intersect_timed_points(points_crime, region_polygon)

points_crime_region_train = points_crime_region[(points_crime_region.timestamps >= start_train) 
                & (points_crime_region.timestamps <= end_train)]

print(type(points_crime_train_region))
print(type(points_crime_region_train))
print(all(points_crime_train_region.timestamps == points_crime_region_train.timestamps))
print(all(points_crime_train_region.xcoords == points_crime_region_train.xcoords))
print(all(points_crime_train_region.ycoords == points_crime_region_train.ycoords))

<class 'open_cp.data.TimedPoints'>
<class 'open_cp.data.TimedPoints'>
<class 'open_cp.data.TimedPoints'>
<class 'open_cp.data.TimedPoints'>
True
True
True


In [7]:
# Get masked grid
masked_grid_ss = open_cp.geometry.mask_grid_by_intersection(ss_polygon, open_cp.data.Grid(xsize=250, ysize=250, xoffset=0, yoffset=0))
# *** size of cells, actually both width and height

# Obtain number of cells in the grid that contain relevant geometry
# (i.e., not the full rectangular grid, only relevant cells)
num_cells = np.sum(~masked_grid_ss_250.mask)


961
