In [1]:
import numpy as np
np.set_printoptions(formatter={'float': lambda x: "{0:0.3f}".format(x)})
import pandas as pd

import sys
sys.path.append("../src/")
import affine_operator_win_bounds as affine_ops

# Load in crime level data

In [2]:
# Load processed data as pandas dataframes
base_dir = "../data/philadelphia/"
viol_density_fn = base_dir + "viol_density.csv"
viol_data = pd.read_csv(viol_density_fn, index_col=0)
non_viol_density_fn = base_dir + "nonviol_density.csv"
non_viol_data = pd.read_csv(non_viol_density_fn,index_col=0)


# Limit ot just June
viol_data = viol_data[[y+"_06" for y in [str(y) for y in range(2006,2019)]]]
non_viol_data = non_viol_data[[y+"_06" for y in [str(y) for y in range(2006,2019)]]]

# convert to numpy
viol_data = viol_data.to_numpy()
non_viol_data = non_viol_data.to_numpy()

# separate out historical from 2018
viol_data_past = viol_data[:, :-1]
non_viol_data_past = non_viol_data[:, :-1]
viol_data = viol_data[:, -1]
non_viol_data = non_viol_data[:, -1]

# mean center based on historical data
viol_data -= np.mean(viol_data_past)
non_viol_data -= np.mean(non_viol_data_past)

N_tracts = len(viol_data)


## Assess heteroskedasticity from past data (under assumption of stationary crime levels across years)

In [3]:
# For simplicity model as homoscedastic, same variance in every tract different variances for viol and non_viol
viol_var = np.mean(np.var(viol_data_past, axis=1))
non_viol_var = np.mean(np.var(non_viol_data_past, axis=1))
print("Noise Variances")
print("violent:\t%0.03f"%viol_var)
print("nonviolent:\t%0.03f"%non_viol_var)


# Estimate signal variances
viol_signal_var = np.var(np.mean(viol_data_past, axis=1))
non_viol_signal_var = np.var(np.mean(non_viol_data_past, axis=1))

print("Variances")
print("Violent:\tNoise:%0.03f\tSignal:%0.03f"%(viol_var, viol_signal_var))
print("non-viol:\tNoise:%0.03f\tSignal:%0.03f"%(non_viol_var, non_viol_signal_var))

Noise Variances
violent:	0.246
nonviolent:	0.346
Variances
Violent:	Noise:0.246	Signal:1.018
non-viol:	Noise:0.346	Signal:1.463


# Smooth Estimates Spatially (separately for violent and non-violent crimes)

## First load in locations of tracts and compute pairwise distances

In [4]:
tract_info_fn = base_dir + "/tract_info.csv"
tract_info = pd.read_csv(tract_info_fn, index_col=0)
lon, lat = tract_info['INTPTLON10'].to_numpy(), tract_info['INTPTLAT10'].to_numpy()

In [5]:
# compute pairwise distances using that (x-y)^2 = x^2 +y^2 - 2xy
lat_dist_sqr = lat[:, None]**2 + lat[None]**2 - 2*lat[None]*lat[:,None]
lon_dist_sqr = lon[:, None]**2 + lon[None]**2 - 2*lon[None]*lon[:,None]
pairwise_dists = np.sqrt(lat_dist_sqr + lon_dist_sqr)

# Estimate with Auxilliary data

In [6]:
# Set up first model
sigma_y_sqr, sigma_z_sqr = viol_var, non_viol_var
prop_signal_var_explained_by_aux = 1/2.
sigma_delta_sqr_1 = viol_signal_var*(1-prop_signal_var_explained_by_aux)

y, z = viol_data, non_viol_data

N = y.shape[0]
A, k = np.eye(N), np.zeros(N)
C_1 = (2*sigma_delta_sqr_1 + sigma_z_sqr)/(2*sigma_delta_sqr_1 + sigma_y_sqr + sigma_z_sqr)*np.eye(N)
l_1 = (sigma_y_sqr/(2*sigma_delta_sqr_1 + sigma_y_sqr + sigma_z_sqr))*z
Sigma = sigma_y_sqr*np.eye(N)

theta_star_1 = C_1.dot(y) + l_1

### compute c-value
c_value = affine_ops.c_value(y=y, A=A, b=k, C=C_1, d=l_1, Sigma=Sigma)
print("c_value(Affine)=%f"%c_value)

c_value(Affine)=0.999982


## Also use spatial smoothing

In [7]:
##### GP smoothing and evaluation for non-violent crimes
y = viol_data

### choose prior parameters and compute prior covariance
prop_var_aux = 1./2
prop_var_location = 1./3
sigma_delta_sqr_2 = viol_signal_var*(1-prop_var_aux-prop_var_location)

# by eye, it looks like a tenth of the diameter of the city is a reasonal length scale
length_scale = np.max(pairwise_dists)/6. 

sigma_loc = viol_signal_var*prop_var_location
K = sigma_loc*np.exp(-(1/2)*pairwise_dists**2/length_scale**2)

C_2 = np.linalg.inv(np.eye(N) + sigma_y_sqr*np.linalg.inv(2*K + (2*sigma_delta_sqr_2 + sigma_z_sqr)*np.eye(N)))
l_2 = np.linalg.inv(np.eye(N) + (sigma_y_sqr**-1)*(2*K + (2*sigma_delta_sqr_2 + sigma_z_sqr)*np.eye(N))).dot(z)

theta_star_2 = C_2.dot(y) + l_2

### compute c-value
c_value = affine_ops.c_value(y=y, A=C_1, b=l_1, C=C_2, d=l_2, Sigma=Sigma)
print("c_value =%f"%c_value)

c_value =0.843140


## Now try directly computing a c-value for the third estimate compared to the MLE.

In [8]:
c_value = affine_ops.c_value(y=y, A=A, b=k, C=C_2, d=l_2, Sigma=Sigma)
print("c_value =%f"%c_value)

### compute c-value
c_value = affine_ops.c_value(y=y, A=C_2, b=l_2, C=C_1, d=l_1, Sigma=Sigma)
print("other order for two bayes ests, c_value =%f"%c_value)

  r = _zeros._bisect(f, a, b, xtol, rtol, maxiter, args, full_output, disp)


c_value =0.999422
other order for two bayes ests, c_value =0.000000
