In [20]:
import pandas as pd
import pickle
import numpy as np
from scipy.ndimage import gaussian_filter

In [11]:
with open('../raw_data/test_df.pickle', 'rb') as f:
            test_df = pickle.load(f)

In [26]:
class Matrix:
    def __init__(self, x, y, z):
        self.sigma_x = x
        self.sigma_y = y
        self.sigma_z = z

In [27]:
matrix = Matrix(x=5, y=4, z=8)

In [28]:
matrix.sigma_z

8

In [12]:
def from_meters_to_steps(lat_meters, lon_meters):
  '''
  Returns the latitude and longitude step to use for the grid buckets
  lat_meters, lon_meters are defined in trainer.py
  They are equivalent to the bucket size desired
  '''

  # Position in decimal degrees
  lat = 40
  lon = -73

  # Earth’s radius (sphere)
  R = 6378137

  # Offset in meters
  dn = lat_meters
  de = lon_meters

  # Coordinate offsets in radians
  dLat = dn / R
  dLon = de / (R * np.cos(np.pi * lat / 180))

  # Offset position, decimal degrees
  latO = dLat * 180 / np.pi
  lonO = dLon * 180 / np.pi

  return latO, lonO

In [13]:
def from_coord_to_matrix(df, lat_meters, lon_meters): #lat_meters, lon_meters
    """
    Returns 3D matrix
    Each coordinate is assigned to a bucket of size lat_meters and lon_meters
    """

    df = df.copy()

    # Adds 'time_index' column to dataframe
    ind = {time: index for index, time in enumerate(np.sort(df['six_hour_date'].unique()))}
    df['time_index'] = df['six_hour_date'].map(ind)

    # Matrix starting point
    grid_offset = np.array([-df['latitude'].max(), df['longitude'].min(), 0]) 
    
    # Converts bucket size (meters) to lat & lon spacing
    lat_spacing, lon_spacing = from_meters_to_steps(lat_meters, lon_meters)
    
    # Euclidian spacing
    grid_spacing = np.array([lat_spacing , lon_spacing, 1])
    
    # Gets point coordinates
    coords = np.array([(-lat, lon, t_ind) for lat, lon, t_ind \
                   in zip(df['latitude'], df['longitude'], df['time_index'])])

    # Converts point to index
    indexes = np.round((coords - grid_offset)/grid_spacing).astype('int')
    
    X = indexes[:, 0]
    Y = indexes[:, 1]
    Z = indexes[:, 2]

    # 75th precinct maximum & minimum points
    lat_min, lat_max, lon_max, lon_min = (40.6218192717505,
                                          40.6951504231971,
                                          -73.90404639808888,
                                          -73.83559344190869)

    lat_diff = lat_max - lat_min # Distance in lat that makes up width of precinct 75
    lon_diff = lon_min - lon_max # Distance in lon that makes up width of precinct 75

    # Dim 1: distance of precinct in lat / lat_spacing
    a = np.zeros((np.round(lat_diff / lat_spacing).astype('int') + 1,
                 np.round(lon_diff / lon_spacing).astype('int') + 1,
                 Z.max() + 1))

    a[X, Y, Z] = 1

    lat_size = a.shape[1]
    lon_size = a.shape[2]
    img3D_non_conv = a

    return img3D_non_conv

In [18]:
img3D_non_conv = from_coord_to_matrix(test_df, 15, 15)

In [16]:
def getting_sigma_values(raw_x, raw_y, raw_z, lat_meters, lon_meters):
    '''
    Returns sigma values for all three dimensions
    Used in gaussian_filter
    '''
    sigma_x = (raw_x / lat_meters) / 2
    sigma_y = (raw_y / lon_meters) / 2
    sigma_z = raw_z / 2

    return sigma_x, sigma_y, sigma_z

In [19]:
sigma_x, sigma_y, sigma_z = getting_sigma_values(120, 120, 12, 15, 15)

In [21]:
def gaussian_filtering(img3D_non_conv, sigma_x, sigma_y, sigma_z):
    '''
    Returns img3D convoluted
    '''

    img3D_conv = gaussian_filter(img3D_non_conv, sigma = (sigma_x, sigma_y, sigma_z))

    return img3D_conv

In [22]:
img3D_conv = gaussian_filtering(img3D_non_conv, sigma_x, sigma_y, sigma_z)

In [None]:
def stacking(self, window, lat_step, lon_step, time_step):
    '''
    Returns stacked crimes
    '''
    grid_offset = np.array([0,0,0]) # Where do you start
    
    #new steps from precise grid
    grid_spacing = np.array([lat_step , lon_step, time_step])
    
    #get points coordinates
    coords = np.argwhere(window)
    flat = window.flatten()
    values = flat[flat != 0]
    
    # Convert point to index
    indexes = np.round((coords - grid_offset) / grid_spacing).astype('int')
    X = indexes[:, 0]
    Y = indexes[:, 1]
    Z = indexes[:, 2]
    
    #virgin matrix: 256 is arbitrary size that works in model
    stacked_crimes = np.zeros((192, 132, Z.max() + 2))

    for i in range(len(indexes)):

        if stacked_crimes[X[i], Y[i], Z[i]] == 0:
            stacked_crimes[X[i], Y[i], Z[i]] = values[i]
        else:
            stacked_crimes[X[i], Y[i], Z[i]] += values[i]

    return stacked_crimes

In [None]:
def get_observation_target_train(self,
                       obs_timeframe,obs_lat,obs_lon, obs_time,
                       target_timeframe,  tar_lat,tar_lon, tar_time):
    '''
    output an observation of x_length consecutive images and the y_length next images as the target
    obs_step, obs_timeframe, target_step, target_timeframe : unit = hours
    '''

    # sample length to absorb impact of gaussian time sigma
    sample_length = obs_timeframe + (self.raw_z + 1) + target_timeframe

    # finds starting position
    position = np.random.randint(0, self.img3D_conv_train.shape[2] - sample_length)

    # samples in train and test dfs
    subsample = self.img3D_conv_train[:, :, position : position + sample_length]

    # divide the subsample in X and y
    observations = subsample[:, :, : obs_timeframe]

    targets = subsample[:, :, - target_timeframe : ]

    # stacked images
    observation = self.stacking_train(observations, obs_lat, obs_lon, obs_time)

    target = self.stacking_train(targets, tar_lat, tar_lon, tar_time)

    return observation, target

def get_X_y_train(self, nb_observations_train, obs_tf,obs_lat,obs_lon, obs_time,
                tar_tf, tar_lat,tar_lon, tar_time):
    '''
    outputs n observations and their associated targets
    '''
    X = []
    y = []

    for n in range(nb_observations_train):
        print(f'Creating observation {n} out of {nb_observations_train}')
        X_subsample, y_subsample = self.get_observation_target_train(obs_tf,
                                    obs_lat,obs_lon, obs_time,
                                    tar_tf,  tar_lat,tar_lon, tar_time)
        X.append(X_subsample)
        y.append(y_subsample)

    X = np.array(X)
    y = np.array(y)

    self.X_train = X
    self.y_train = y

    return self.X_train, self.y_train