In [1]:
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd
import os
from itertools import product
import pickle

from urban_growth.model import *
from urban_growth.simulator import *
from urban_growth.estimator import *

%matplotlib inline

# Construct the Control Frame

In [2]:
files = os.listdir('data')
maps = [name for name in files if 'mask' not in name]
control_df = pd.DataFrame({'file_name' : maps} )

control_df['city'] = control_df.file_name.str[0:3]
control_df
control_df['year_begin'] = pd.to_numeric(control_df.file_name.str[3:7])
control_df = control_df[control_df.year_begin < 2011]

def get_year_end(year):
    if year in [1990, 2000]:
        return year + 10
    else:
        return year + 5
    
v_gye = np.vectorize(get_year_end)

control_df['year_end'] = v_gye(control_df.year_begin)

control_df = control_df.drop('file_name', 1)

In [3]:
# delete this to run full suite
control_df = control_df[control_df.city == 'LAG']

# Shared Parameters

In [4]:
# kernel params, probably won't loop over these
L = 15 
base_val = .4
unit = 3.333 # pixels per km
K = distance_kernel(L, base_val, unit)

# Data processing params, no need to touch
N_pix = 100

# initial parameters, might need to loop over these

par_0 = {'alpha' : np.array([100, 30]),
        'gamma' : np.array([1.2, 2.5]),
        'beta'  : -25}

# Blur Grid Search

## Parameters

In [5]:
# thresholding params, will loop over these
# sigma = np.linspace(0, 5, 26)
# t     = np.linspace(0, .5, 10)

sigma = np.linspace(0, 5, 1)
t     = np.linspace(0, .5, 1)

## Define Grid Search

In [6]:
def grid_search_blur(city, year_begin, year_end, sigma, t, par_0):
    
    # get and clean data for given city and years
    M0  = np.loadtxt('data/' + city + str(year_begin) + '.csv', dtype=float, delimiter=',')
    M1  = np.loadtxt('data/' + city + str(year_end) + '.csv',   dtype=float, delimiter=',')
    geo = np.loadtxt('data/' + city + '_mask.csv',   dtype=float, delimiter=',')

    geo = 1 - geo
    M0 = M0 * geo
    M1 = M1 * geo

    M1  = np.maximum(M0, M1)
    
    # initialize lists
    
    city_vec       = []
    year_begin_vec = []
    year_end_vec   = []
    sig_vec        = []
    t_vec          = []
    par_0_vec      = []
    par_vec        = []
    ll_vec         = []
    cov_vec        = []
    N_eff_vec      = []
    n_pars_vec     = []
    AIC_vec        = []
    rate_vec       = [] 
    bg_rate_vec    = []
        
    # computations
    for sig,tau in product(sigma, t):
        print city + ', ' + str(year_begin) + '-' + str(year_end) + ' : sigma = ' + str(sig) + ' , t = ' + str(tau)

        e = estimator(M0 = M0, geo = geo,  N_pix = N_pix, sigma = sig, t = tau)
        res = e.ml(M1, K, par_0, opts = {'disp' : False}, use_grad = True)
        
        if tau == 0:
            n_pars = 4
        else:
            n_pars = 7
        
        ll = e.log_likelihood(K = K, M1 = M1, pars = res[0])
        
        N_eff = ((1 - M0) * geo * N_pix).sum()
        settlement_rate = (M1 - M0).sum() * N_pix / N_eff
        bg_rate = expit(res[0]['beta'])
        
        # updates to storage lists
        city_vec.append(city)
        year_begin_vec.append(year_begin)
        year_end_vec.append(year_end)
        sig_vec.append(sig)
        t_vec.append(tau)
        par_0_vec.append(par_0)
        par_vec.append(res[0])
        ll_vec.append(ll)
        cov_vec.append(res[2])
        N_eff_vec.append(N_eff)
        n_pars_vec.append(n_pars)
        AIC_vec.append(2 * (n_pars - ll))
        rate_vec.append(settlement_rate)
        bg_rate_vec.append(bg_rate)
        
    df = pd.DataFrame({
        'city'       : city_vec,
        'year_begin' : year_begin_vec,
        'year_end'   : year_end_vec,
        'sigma'      : sig_vec,
        't'          : t_vec,
        'par_0'      : par_0_vec,
        'par'        : par_vec,
        'll'         : ll_vec,
        'cov'        : cov_vec,
        'N_eff'      : N_eff_vec,
        'n_pars'     : n_pars_vec,
        'AIC'        : AIC_vec,
        'settlement_rate' : rate_vec,
        'bg_rate' : bg_rate_vec
    })
    
    return df

## Run Grid Search

In [7]:
out = pd.concat([grid_search_blur(control_df.city.iloc[i], 
                             control_df.year_begin.iloc[i],
                             control_df.year_end.iloc[i], 
                             sigma, 
                             t,  
                             par_0) for i in range(len(control_df))])

LAG, 2000-2010 : sigma = 0.0 , t = 0.0


  lls           =   self.N_pix*((M1-self.M0)*np.log(p) + (1-M1)*np.log(1-p))
  lls           =   self.N_pix*((M1-self.M0)*np.log(p) + (1-M1)*np.log(1-p))
  grad_coefs    =   self.N_pix*((M1-self.M0)/p-(1-M1)/(1-p))
  return k * np.log(p) + (n - k) * np.log(1 - p) + np.log(special.binom(n, k))
  return k * np.log(p) + (n - k) * np.log(1 - p) + np.log(special.binom(n, k))


LAG, 2010-2015 : sigma = 0.0 , t = 0.0


  k   = K ** (-gamma)
  c_deriv = - alpha * (d_convd * denom - d_denom * convd) / (denom ** 2)
  a_deriv = convd / denom
  for i in range(2)
  return np.nanmean(lls), np.nanmean(grad, axis = (1, 2))


## Save Output

In [9]:
out_path = "throughput/out_blur.p"
out_file = open(out_path,'wb')
pickle.dump(out_path,out_file)   
out_file.close()

# Threshold Grid Search

In [10]:
# thresholding params, will loop over these
# q = np.linspace(0, 1, 11)

q = np.linspace(0, 5, 2)

In [13]:
def grid_search_thresh(city, year_begin, year_end, thresh_vec, par_0):
    
    # get and clean data for given city and years
    M0  = np.loadtxt('data/' + city + str(year_begin) + '.csv', dtype=float, delimiter=',')
    M1  = np.loadtxt('data/' + city + str(year_end) + '.csv',   dtype=float, delimiter=',')
    geo = np.loadtxt('data/' + city + '_mask.csv',   dtype=float, delimiter=',')

    geo = 1 - geo
    M0 = M0 * geo
    M1 = M1 * geo

    M1  = np.maximum(M0, M1)
    
    # initialize lists
    
    city_vec       = []
    year_begin_vec = []
    year_end_vec   = []
    q_vec          = []
    T_vec          = []
    par_0_vec      = []
    par_vec        = []
    ll_vec         = []
    cov_vec        = []
    N_eff_vec      = []
    n_pars_vec     = []
    AIC_vec        = []
    rate_vec       = [] 
    bg_rate_vec    = []
    
    for q in thresh_vec:
        M = M0.copy()
        M[M < q] = 0
        morph = morphology.label(M > 0)
        C     = np.zeros((2, morph.shape[0], morph.shape[1]))
        labels = np.unique(morph)
        size_thresh = {lab : M[np.where(morph == lab)].sum() for lab in labels}
        
        size_thresh = np.unique(np.round(size_thresh.values()))

        for T in size_thresh:
            print city + ', ' + str(year_begin) + '-' + str(year_end) + ' : T = ' + str(T) + ' , q = ' + str(q)

            e = estimator(M0 = M, geo = geo, N_pix = N_pix, thresh = T, class_type = 'thresh')
            res = e.ml(M1, K, par_0, opts = {'disp' : False}, use_grad = True)

            if q == 0:
                n_pars = 4
            else:
                n_pars = 7

            ll = e.log_likelihood(K = K, M1 = M1, pars = res[0])

            N_eff = ((1 - M0) * geo * N_pix).sum()
            settlement_rate = (M1 - M0).sum() * N_pix / N_eff
            bg_rate = expit(res[0]['beta'])

            # updates to storage lists
            city_vec.append(city)
            year_begin_vec.append(year_begin)
            year_end_vec.append(year_end)
            q_vec.append(q)
            T_vec.append(T)
            par_0_vec.append(par_0)
            par_vec.append(res[0])
            ll_vec.append(ll)
            cov_vec.append(res[2])
            N_eff_vec.append(N_eff)
            n_pars_vec.append(n_pars)
            AIC_vec.append(2 * (n_pars - ll))
            rate_vec.append(settlement_rate)
            bg_rate_vec.append(bg_rate)
        
    df = pd.DataFrame({
        'city'       : city_vec,
        'year_begin' : year_begin_vec,
        'year_end'   : year_end_vec,
        'sigma'      : sig_vec,
        't'          : t_vec,
        'par_0'      : par_0_vec,
        'par'        : par_vec,
        'll'         : ll_vec,
        'cov'        : cov_vec,
        'N_eff'      : N_eff_vec,
        'n_pars'     : n_pars_vec,
        'AIC'        : AIC_vec,
        'settlement_rate' : rate_vec,
        'bg_rate' : bg_rate_vec
    })
    
    return df

In [14]:
out_thresh = pd.concat([grid_search_thresh(control_df.city.iloc[i], 
                                        control_df.year_begin.iloc[i],
                                        control_df.year_end.iloc[i], 
                                        q, 
                                        par_0) for i in range(len(control_df))])

out_path = "throughput/out_thresh.p"
out_file = open(out_path,'wb')
pickle.dump(out_path,out_file)   
out_file.close()

LAG, 2000-2010 : T = 0.0 , q = 0.0
LAG, 2000-2010 : T = 1.0 , q = 0.0


  grad_coefs    =   self.N_pix*((M1-self.M0)/p-(1-M1)/(1-p))
  grad = grad_coefs*grad


LAG, 2000-2010 : T = 2.0 , q = 0.0
LAG, 2000-2010 : T = 3.0 , q = 0.0
LAG, 2000-2010 : T = 4.0 , q = 0.0


KeyboardInterrupt: 