In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2
import sys, os, time, math, csv
import itertools
import collections

import numpy as np
np.warnings.filterwarnings('ignore')
import pandas as pd

import matplotlib
import matplotlib.pyplot as plt

import scipy.optimize

from sklearn.metrics import mean_absolute_error, mean_squared_error, median_absolute_error, r2_score
from sklearn.linear_model import LinearRegression
from sklearn.svm import LinearSVR
from sklearn.model_selection import KFold

import MigrationModels
import MigrationEvaluationMethods

#simple-maps
sys.path.append(os.path.join(os.getcwd(),"simple-maps"))
from simplemaps.SimpleFigures import simpleMap, differenceMap

In [39]:
import keras
import keras.backend as K
from keras.optimizers import SGD, Adam
from keras.models import Model
from keras.layers import Dense, Dropout, AlphaDropout, Input, BatchNormalization, Activation
from keras.wrappers.scikit_learn import KerasRegressor
from keras.callbacks import EarlyStopping

from sklearn.preprocessing import StandardScaler
from sklearn.externals import joblib

# Initial setup

In [2]:
years = range(2004, 2014 + 1)

In [3]:
shapefile_fn = "data/intermediate/boundary_shapefiles/cb_2015_us_county_500k.shp"
shapefile_key = "GEOID"

# Load data

### Migration data

In [4]:
migration_matrices = [
    np.load("data/processed/migration/migration_matrix_%d.npy" % (year))
    for year in years
]

for migration_matrix in migration_matrices:
    np.fill_diagonal(migration_matrix, 0.0)

f = open("data/processed/county_intersection_list_2004_2014.txt")
county_list = f.read().strip().split("\n")
f.close()

county_fips_to_idx = {fips:idx for idx, fips in enumerate(county_list)}
num_counties = len(county_list)

In [5]:
f = open("data/processed/hurricane_affected_counties.csv", "r")
flooded_counties = f.read().strip().split("\n")
f.close()

unflooded_counties = [fips for fips in county_list if fips not in flooded_counties]

flooded_county_idxs = np.array([county_fips_to_idx[fips] for fips in flooded_counties])
unflooded_county_idxs = np.array([county_fips_to_idx[fips] for fips in unflooded_counties])

all_county_idxs = np.array(list(range(num_counties)))

### Per county features

In [6]:
population_by_year = pd.read_csv("data/processed/county_population_2004_2014.csv", dtype={"FIPS":str})
population_by_year = population_by_year.set_index("FIPS")

population_vectors_by_year = [population_by_year["POPESTIMATE%d" % (year)].values.reshape(-1,1) for year in years]

### Extra features

In [7]:
distances = np.load("data/processed/county_distance_matrix.npy")

In [8]:
intervening_population_opportunities = []
for i in range(len(years)):
    s = MigrationModels.getInterveningOpportunities(population_vectors_by_year[i], distances)
    intervening_population_opportunities.append(s)

# Experiments

In [9]:
def get_full_dataset(year_idx, origin_list, destination_list, model=None):

    origin_pop = population_vectors_by_year[year_idx][origin_list,:].astype(float)
    destination_pop = population_vectors_by_year[year_idx][destination_list,:].astype(float)
    S = intervening_population_opportunities[year_idx][origin_list,:][:,destination_list].astype(float)
    D = distances[origin_list,:][:,destination_list].astype(float)
    T = migration_matrices[year_idx][origin_list,:][:,destination_list].astype(float)

    t_model = LinearRegression(fit_intercept=False)
    t_model.fit(origin_pop, T.sum(axis=1))
    beta = t_model.coef_[0]

    args = {
        "origin_pop": origin_pop,
        "destination_pop": destination_pop,
        "S": S,
        "D": D,
        "T": T,
        "beta": beta,
        "model": model
    }
    return args

In [25]:
def get_pairs_from_full_dataset(args):
    
    origin_pop = args["origin_pop"]
    destination_pop = args["destination_pop"]
    S = args["S"]
    D = args["D"]
    T = args["T"]
    
    num_rows, num_cols = origin_pop.shape[0], destination_pop.shape[0]
    num_entries = num_rows * num_cols
    num_features = 4
    
    X = np.zeros((num_entries, num_features), dtype=float)
    Y = np.zeros(num_entries, dtype=float)    
    
    for i in range(num_rows):
        for j in range(num_cols):
            
            idx = (i*num_cols) + j
            
            origin_idx = i
            dest_idx = j
            
            X[idx,:] = [
                origin_pop[origin_idx],
                destination_pop[dest_idx],
                D[origin_idx, dest_idx],
                S[origin_idx, dest_idx],
            ]
            
            Y[idx] = T[origin_idx, dest_idx]

    return X, Y

In [53]:
def run_traditional_models(alpha, args):
    model = args["model"]
    origin_pop, destination_pop = args["origin_pop"], args["destination_pop"]
    S, D, T = args["S"], args["D"], args["T"]
    beta = args["beta"]
    
    if model == "extrad":
        P = MigrationModels.extendedRadiationModel(origin_pop, destination_pop, S, alpha)
    elif model == "rad":
        P = MigrationModels.radiationModel(origin_pop, destination_pop, S)
    elif model == "gravpow":
        P = MigrationModels.gravityModel(origin_pop, destination_pop, D, alpha, decay="power")
    elif model == "gravexp":
        P = MigrationModels.gravityModel(origin_pop, destination_pop, D, alpha, decay="exponential")
    
    P = MigrationModels.row_normalize(P)
    T_pred = MigrationModels.productionFunction(origin_pop, P, beta=beta)
    
    return T, T_pred

def fit_traditional_models(alpha, args):
    T, T_pred = run_traditional_models(alpha, args)
    
    score = MigrationEvaluationMethods.cpc(T, T_pred)    
    return -score
    
def evaluate_traditional_models(alpha, beta, args):
    args["beta"] = beta
    T, T_pred = run_traditional_models(alpha, args)
    D = args["D"]
    
    return MigrationEvaluationMethods.evaluate_all(T, T_pred, D)

## Traditional models

### Flooded to Unflooded

In [18]:
models = [
    "gravexp",
    "gravpow",
    "rad",
    "extrad"
]

In [15]:
print("Flooded to Unflooded")
flooded_to_unflooded_cv = {}
for model in models:
    print(model)
    results = []
    
    for county in flooded_county_idxs:
        
        t_train_counties = list(set(flooded_county_idxs) - set([county]))
        t_test_counties = [county]
    
        # Train
        args = get_full_dataset(1, t_train_counties, unflooded_county_idxs, model=model)

        opt_result = scipy.optimize.minimize(fit_traditional_models, x0=[1.0], args=args, bounds=[[0,3]])
        alpha = opt_result.x[0]
        beta = args["beta"]
        
        # Test
        args = get_full_dataset(1, t_test_counties, unflooded_county_idxs, model=model)
        scores = evaluate_traditional_models(alpha, beta, args)

        #print("\t%s,%0.4f,%0.3f,%0.3f,%0.3f,%0.3f,%0.3f,%0.3f" % (model, alpha, scores[0], scores[1], scores[2], scores[3], scores[4], scores[5]))
        results.append((alpha, beta, scores[0], scores[1], scores[2], scores[3], scores[4], scores[5]))
        
    flooded_to_unflooded_cv[model] = results

Flooded to Unflooded
gravexp
	gravexp,0.0044,0.408,0.449,7.742,0.200,7.742,0.200


  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)


	gravexp,0.0050,0.500,0.524,38.544,0.429,38.544,0.429
	gravexp,0.0048,0.084,0.084,0.685,-0.027,0.685,-0.027
	gravexp,0.0047,0.331,0.352,1.500,0.418,1.500,0.418


  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)


	gravexp,0.0047,0.370,0.417,0.942,0.275,0.942,0.275
	gravexp,0.0048,0.565,0.591,12.487,0.638,12.487,0.638
	gravexp,0.0047,0.465,0.512,6.174,0.214,6.174,0.214
gravpow


  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)


	gravpow,1.5511,0.344,0.384,8.571,0.267,8.571,0.267
	gravpow,1.8035,0.448,0.473,42.580,-0.008,42.580,-0.008
	gravpow,1.6082,0.156,0.156,0.631,0.202,0.631,0.202
	gravpow,1.5992,0.258,0.274,1.664,0.347,1.664,0.347
	gravpow,1.6073,0.366,0.402,0.948,0.449,0.948,0.449
	gravpow,1.6358,0.569,0.603,12.387,0.432,12.387,0.432
	gravpow,1.5697,0.478,0.535,6.016,0.468,6.016,0.468
rad
	rad,1.0000,0.571,0.590,5.604,0.734,5.604,0.734
	rad,1.0000,0.347,0.360,50.346,-0.364,50.346,-0.364
	rad,1.0000,0.674,0.674,0.244,0.742,0.244,0.742
	rad,1.0000,0.459,0.485,1.213,0.334,1.213,0.334
	rad,1.0000,0.470,0.481,0.793,-1.432,0.793,-1.432
	rad,1.0000,0.418,0.430,16.713,-0.562,16.713,-0.562
	rad,1.0000,0.346,0.354,7.547,-1.032,7.547,-1.032
extrad


  return P / P.sum(axis=1, keepdims=True)


	extrad,0.8393,0.551,0.573,5.866,0.673,5.866,0.673
	extrad,0.2366,0.499,0.527,38.605,0.353,38.605,0.353
	extrad,0.1229,0.509,0.509,0.367,0.800,0.367,0.800
	extrad,0.1231,0.325,0.342,1.513,0.440,1.513,0.440
	extrad,0.1242,0.441,0.480,0.837,0.564,0.837,0.564


  return P / P.sum(axis=1, keepdims=True)


	extrad,0.1232,0.589,0.621,11.811,0.601,11.811,0.601
	extrad,0.1129,0.503,0.552,5.732,0.573,5.732,0.573


In [16]:
print("Migration Model,Best Alpha,CPC,CPC_d,MAE,R2,MAE,R2")
for model, values in flooded_to_unflooded_cv.items():
    print(model, end=",")
    values = np.array(values)
    means = values.mean(axis=0)
    stds = values.std(axis=0)

    for i in range(means.shape[0]):
        print("%0.4f (%0.4f)," % (means[i], stds[i]), end="")
    print("")

Migration Model,Best Alpha,CPC,CPC_d,MAE,R2,MAE,R2
rad,1.0000 (0.0000),0.1674 (0.0326),0.4692 (0.1104),0.4820 (0.1087),11.7799 (16.6184),-0.2258 (0.7936),11.7799 (16.6184),-0.2258 (0.7936),
gravexp,0.0047 (0.0002),0.1674 (0.0326),0.3889 (0.1446),0.4184 (0.1544),9.7246 (12.4281),0.3068 (0.1964),9.7246 (12.4281),0.3068 (0.1964),
gravpow,1.6250 (0.0772),0.1674 (0.0326),0.3742 (0.1288),0.4038 (0.1414),10.3996 (13.7485),0.3081 (0.1584),10.3996 (13.7485),0.3081 (0.1584),
extrad,0.2403 (0.2478),0.1674 (0.0326),0.4882 (0.0790),0.5149 (0.0820),9.2474 (12.5452),0.5719 (0.1355),9.2474 (12.5452),0.5719 (0.1355),


### All to all

In [20]:
all_to_all_cv = {}
for model in models:
    print(model)
    results = []
    
    kf = KFold(n_splits=5)
    for train_index, test_index in kf.split(all_county_idxs):
    
        t_train_counties = all_county_idxs[train_index]
        t_test_counties = all_county_idxs[test_index]
        
        year_results = []
        for i, year in enumerate(years):
            # Train
            args = get_full_dataset(i, t_train_counties, all_county_idxs, model=model)

            opt_result = scipy.optimize.minimize(fit_traditional_models, x0=[1.0], args=args, bounds=[[0,3]])
            alpha = opt_result.x[0]
            beta = args["beta"]

            # Test
            args = get_full_dataset(i, t_test_counties, all_county_idxs, model=model)
            scores = evaluate_traditional_models(alpha, beta, args)

            #print("\t%s,%0.4f,%0.3f,%0.3f,%0.3f,%0.3f,%0.3f,%0.3f" % (model, alpha, scores[0], scores[1], scores[2], scores[3], scores[4], scores[5]))
            year_results.append((alpha, beta, scores[0], scores[1], scores[2], scores[3], scores[4], scores[5]))
            

        year_results = np.array(year_results)
        print("\t",year_results.mean(axis=0))
        results.append(year_results.mean(axis=0))
    all_to_all_cv[model] = results

gravexp


  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)


	 [ 2.12698724e-02  3.56647768e-02  3.40786679e-01  4.06317877e-01
  2.19109070e+00 -7.30525613e+00  5.77072191e+02  1.39107742e-01]


  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)


	 [1.31539888e-02 3.17404956e-02 3.84495738e-01 5.00448427e-01
 8.76477160e-01 1.56739529e-01 2.81091308e+02 5.80862081e-01]


  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)


	 [ 1.43949812e-02  3.12666424e-02  4.27273974e-01  6.05016503e-01
  1.17631935e+00 -6.36860962e-01  3.67668071e+02  5.67612908e-01]


  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)


	 [ 1.28959416e-02  3.19591616e-02  3.78948982e-01  4.86161103e-01
  1.04251004e+00 -1.98299429e-01  2.85513315e+02  6.78842189e-01]


  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)


	 [ 1.00000000e+00  3.11123357e-02  1.03158193e-03  1.25396716e-03
  1.93426593e+00 -1.07102841e+01  3.66958808e+02  5.77509102e-01]
gravpow
	 [ 2.78543828e+00  3.56647768e-02  5.16550170e-01  6.47748906e-01
  1.60873072e+00 -1.71528523e+00  5.72329189e+02  4.01854149e-01]
	 [2.59533821e+00 3.17404956e-02 5.73267885e-01 8.82634665e-01
 6.09498817e-01 6.26338657e-01 2.21677063e+02 7.03893336e-01]
	 [2.69156750e+00 3.12666424e-02 5.43767224e-01 8.46278840e-01
 9.38042482e-01 1.21852884e-01 3.60312214e+02 3.20245543e-01]
	 [2.58789362e+00 3.19591616e-02 5.83793236e-01 8.57170495e-01
 6.99407294e-01 5.91621077e-01 2.37988283e+02 7.67576256e-01]
	 [ 2.77456844e+00  3.11123357e-02  5.44753032e-01  7.64165523e-01
  8.81064207e-01 -1.20390507e-01  3.22427185e+02  7.25555573e-01]
rad
	 [ 1.00000000e+00  3.56647768e-02  4.23899584e-01  5.32665309e-01
  1.91664118e+00 -7.93308460e-01  4.81179399e+02  7.93493186e-01]
	 [ 1.00000000e+00  3.17404956e-02  4.44516369e-01  7.11054857e-01
  7.91867221e-

  return P / P.sum(axis=1, keepdims=True)
  return P / P.sum(axis=1, keepdims=True)
  return P / P.sum(axis=1, keepdims=True)
  return P / P.sum(axis=1, keepdims=True)
  return P / P.sum(axis=1, keepdims=True)
  return P / P.sum(axis=1, keepdims=True)


	 [ 3.83692394e-01  3.56647768e-02  4.98209947e-01  6.33605995e-01
  1.66679216e+00 -2.50671426e-02  4.41202747e+02  8.20360161e-01]


  return P / P.sum(axis=1, keepdims=True)
  return P / P.sum(axis=1, keepdims=True)


	 [3.35761014e-01 3.17404956e-02 4.88790104e-01 7.44538284e-01
 7.27528201e-01 2.86173433e-01 2.15002675e+02 8.53804025e-01]


  return P / P.sum(axis=1, keepdims=True)
  return P / P.sum(axis=1, keepdims=True)


	 [3.54324342e-01 3.12666424e-02 4.90078316e-01 7.50945360e-01
 1.04653839e+00 1.50163850e-01 2.97640323e+02 8.65307573e-01]


  return P / P.sum(axis=1, keepdims=True)


	 [3.39164951e-01 3.19591616e-02 4.93185462e-01 7.34160918e-01
 8.50183970e-01 1.91005622e-01 2.36220909e+02 8.33371330e-01]


  return P / P.sum(axis=1, keepdims=True)
  return P / P.sum(axis=1, keepdims=True)
  return P / P.sum(axis=1, keepdims=True)


	 [3.56894044e-01 3.11123357e-02 4.94766823e-01 7.08537180e-01
 9.77089364e-01 1.35366654e-01 2.51176908e+02 9.12361953e-01]


In [None]:
print("Migration Model,Best Alpha,CPC,CPC_d,MAE,R2,MAE,R2")
for model, values in all_to_all_cv.items():
    print(model, end=",")
    values = np.array(values)
    means = values.mean(axis=0)
    stds = values.std(axis=0)

    for i in range(means.shape[0]):
        print("%0.4f (%0.4f)," % (means[i], stds[i]), end="")
    print("")

### Unflooded to unflooded

In [19]:
%%time
unflooded_to_unflooded_cv = {}
for model in models:
    print(model)
    results = []
    
    kf = KFold(n_splits=5)
    for train_index, test_index in kf.split(unflooded_county_idxs):
    
        t_train_counties = unflooded_county_idxs[train_index]
        t_test_counties = unflooded_county_idxs[test_index]
        
        year_results = []
        for i, year in enumerate(years):
            # Train
            args = get_full_dataset(i, t_train_counties, unflooded_county_idxs, model=model)

            opt_result = scipy.optimize.minimize(fit_traditional_models, x0=[1.0], args=args, bounds=[[0,3]])
            alpha = opt_result.x[0]
            beta = args["beta"] 

            # Test
            args = get_full_dataset(i, t_test_counties, unflooded_county_idxs, model=model)
            scores = evaluate_traditional_models(alpha, beta, args)

            #print("\t%s,%0.4f,%0.3f,%0.3f,%0.3f,%0.3f,%0.3f,%0.3f" % (model, alpha, scores[0], scores[1], scores[2], scores[3], scores[4], scores[5]))
            year_results.append((alpha, beta, scores[0], scores[1], scores[2], scores[3], scores[4], scores[5]))
            

        year_results = np.array(year_results)
        print("\t",year_results.mean(axis=0))
        results.append(year_results.mean(axis=0))
    unflooded_to_unflooded_cv[model] = results

gravexp


  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)


	 [ 2.13551790e-02  3.54176120e-02  3.40364356e-01  4.05680964e-01
  2.19999435e+00 -7.29953674e+00  5.76868710e+02  1.43453625e-01]


  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)


	 [1.31167840e-02 3.16508438e-02 3.82730953e-01 4.95983160e-01
 8.42031197e-01 1.61899607e-01 2.70995248e+02 5.81904562e-01]


  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)


	 [ 1.43711423e-02  3.11315263e-02  4.27231845e-01  6.06152433e-01
  1.18043044e+00 -6.30833533e-01  3.68286713e+02  5.70772860e-01]


  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)


	 [ 1.28481547e-02  3.18267211e-02  3.79115708e-01  4.85532577e-01
  1.04393845e+00 -1.89195518e-01  2.87943108e+02  6.80800016e-01]


  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)
  denominator = np.exp(d*alpha)


	 [ 1.00000000e+00  3.10205157e-02  1.03430583e-03  1.25787453e-03
  1.92175471e+00 -1.05402183e+01  3.56528668e+02  5.93282472e-01]
gravpow
	 [ 2.79119670e+00  3.54176120e-02  5.16706037e-01  6.47590312e-01
  1.61373364e+00 -1.71481103e+00  5.72809037e+02  4.02665212e-01]
	 [2.59541302e+00 3.16508438e-02 5.75166087e-01 8.82652749e-01
 5.80451145e-01 6.51521193e-01 2.10637771e+02 7.21002255e-01]
	 [2.69291593e+00 3.11315263e-02 5.44380724e-01 8.48284427e-01
 9.39970616e-01 1.24222649e-01 3.60143957e+02 3.24063953e-01]
	 [2.58842967e+00 3.18267211e-02 5.84633273e-01 8.57138755e-01
 6.99136630e-01 5.92813863e-01 2.38456270e+02 7.69520795e-01]
	 [ 2.77420927e+00  3.10205157e-02  5.45241913e-01  7.63427506e-01
  8.74450866e-01 -9.51167386e-02  3.18946343e+02  7.32812562e-01]
rad
	 [ 1.00000000e+00  3.54176120e-02  4.23981633e-01  5.32577593e-01
  1.92292022e+00 -7.89622116e-01  4.80695290e+02  7.94036088e-01]
	 [ 1.00000000e+00  3.16508438e-02  4.45323721e-01  7.15191359e-01
  7.57101514e-

  return P / P.sum(axis=1, keepdims=True)
  return P / P.sum(axis=1, keepdims=True)
  return P / P.sum(axis=1, keepdims=True)
  return P / P.sum(axis=1, keepdims=True)
  return P / P.sum(axis=1, keepdims=True)
  return P / P.sum(axis=1, keepdims=True)
  return P / P.sum(axis=1, keepdims=True)


	 [ 3.84139013e-01  3.54176120e-02  4.98224630e-01  6.33171393e-01
  1.67245848e+00 -2.50903380e-02  4.41252598e+02  8.20487972e-01]


  return P / P.sum(axis=1, keepdims=True)
  return P / P.sum(axis=1, keepdims=True)


	 [3.35312206e-01 3.16508438e-02 4.90433197e-01 7.43383802e-01
 6.94506953e-01 2.91018808e-01 2.02560345e+02 8.69829328e-01]


  return P / P.sum(axis=1, keepdims=True)
  return P / P.sum(axis=1, keepdims=True)


	 [3.53464209e-01 3.11315263e-02 4.90090194e-01 7.51610673e-01
 1.05009346e+00 1.51723274e-01 2.99537341e+02 8.65810106e-01]


  return P / P.sum(axis=1, keepdims=True)


KeyboardInterrupt: 

In [None]:
print("Migration Model,Best Alpha,CPC,CPC_d,MAE,R2,MAE,R2")
for model, values in unflooded_to_unflooded_cv.items():
    print(model, end=",")
    values = np.array(values)
    means = values.mean(axis=0)
    stds = values.std(axis=0)

    for i in range(means.shape[0]):
        print("%0.4f (%0.4f)," % (means[i], stds[i]), end="")
    print("")

## ML Models

In [81]:
def cpc_loss(y_true, y_pred):
    return 1.0 - (2.0*K.sum(K.minimum(y_true,y_pred))) / (K.sum(y_true) + K.sum(y_pred))

def baseline_model():
    inputs = Input(shape=(4,))
    x = inputs

    x = Dense(128, activation="relu")(x)
    x = Dense(128, activation="relu")(x)
    
    outputs = Dense(1, activation="relu")(x)
    
    model = Model(inputs=[inputs], outputs=[outputs])
    optimizer = Adam(lr=0.001)

    model.compile(loss=cpc_loss, metrics=["mse", cpcLoss], optimizer=optimizer)
    return model

In [78]:
def evaluate_ml_models(model, x_test, y_test, beta, args):
    
    origin_pop, destination_pop = args["origin_pop"], args["destination_pop"]
    S, D, T = args["S"], args["D"], args["T"]
    
    n, m = origin_pop.shape[0], destination_pop.shape[0]
    
    try:
        P = model.predict(x_test, batch_size=2**14).reshape(n, m)
    except:
        P = model.predict(x_test).reshape(n, m)
    P = MigrationModels.row_normalize(P)
    P[P<0] = 0
    T_pred  = MigrationModels.productionFunction(origin_pop, P, beta=beta)
    T_pred[np.isnan(T_pred)] = 0
    
    return MigrationEvaluationMethods.evaluate_all(T, T_pred, D)

### Flooded to Unflooded

In [79]:
print("Flooded to Unflooded")
flooded_to_unflooded_cv = []
model = "dl"
    
for county in flooded_county_idxs:

    t_train_counties = list(set(flooded_county_idxs) - set([county]))
    t_test_counties = [county]

    # Load train
    args = get_full_dataset(1, t_train_counties, unflooded_county_idxs, model=model)
    beta = args["beta"]
    x_train, y_train = get_pairs_from_full_dataset(args)
    
    y_train_binary = y_train.copy().astype(int)
    mask = y_train>0

    positive_indices = np.where(mask)[0]
    negative_indices = np.where(~mask)[0]
    
    num_positive = positive_indices.shape[0]
    num_negative = negative_indices.shape[0]
    new_num_negative = max(negative_indices.shape[0], 40*num_positive)
    
    negative_indices = np.random.choice(negative_indices, size=new_num_negative, replace=True)
    
    new_indices = np.concatenate([
        positive_indices,
        negative_indices
    ])
    
    x_train = x_train[new_indices]
    y_train = y_train[new_indices]
    
    scaler = StandardScaler()
    x_train = scaler.fit_transform(x_train)
    
    
    K.clear_session()
    early_stopping = EarlyStopping(monitor="cpcLoss", patience=100, restore_best_weights=True)
    t_model = keras.wrappers.scikit_learn.KerasRegressor(
        build_fn=baseline_model,
        epochs=500,
        batch_size=2**12,
        callbacks=[early_stopping],
        verbose=0
    )
    history = t_model.fit(x_train, y_train)
    
    
    # Test
    args = get_full_dataset(1, t_test_counties, unflooded_county_idxs, model=model)
    x_test, y_test = get_pairs_from_full_dataset(args)
    x_test = scaler.transform(x_test)
        
    scores = evaluate_ml_models(t_model, x_test, y_test, beta, args)

    print("%s,%0.4f,%0.3f,%0.3f,%0.3f,%0.3f,%0.3f,%0.3f" % (model, beta, scores[0], scores[1], scores[2], scores[3], scores[4], scores[5]))
    flooded_to_unflooded_cv.append((beta, scores[0], scores[1], scores[2], scores[3], scores[4], scores[5]))

Flooded to Unflooded
dl,0.1679,0.410,0.334,6.137,0.332,6.137,0.332
dl,0.0997,0.422,0.308,31.455,0.128,31.455,0.128
dl,0.1691,0.291,0.355,0.646,-0.060,0.646,-0.060
dl,0.1692,0.428,0.522,1.564,-1.164,1.564,-1.164
dl,0.1696,0.337,0.761,2.226,-2.736,2.226,-2.736
dl,0.2203,0.000,0.020,46.792,-0.320,46.792,-0.320
dl,0.1758,0.350,0.527,11.039,-0.152,11.039,-0.152


In [80]:
print("Migration Model,Best Alpha,CPC,CPC_d,MAE,R2,MAE,R2")
print(model, end=",")
values = np.array(flooded_to_unflooded_cv)
means = values.mean(axis=0)
stds = values.std(axis=0)

for i in range(means.shape[0]):
    print("%0.4f (%0.4f)," % (means[i], stds[i]), end="")
print("")

Migration Model,Best Alpha,CPC,CPC_d,MAE,R2,MAE,R2
dl,0.1674 (0.0326),0.3199 (0.1386),0.4038 (0.2141),14.2655 (16.5689),-0.5673 (0.9886),14.2655 (16.5689),-0.5673 (0.9886),


### All to all

In [83]:
all_to_all_cv = []

kf = KFold(n_splits=5)
for train_index, test_index in kf.split(all_county_idxs):

    t_train_counties = all_county_idxs[train_index]
    t_test_counties = all_county_idxs[test_index]

    year_results = []
    for i, year in enumerate(years):

        # Load train
        args = get_full_dataset(i, t_train_counties, all_county_idxs, model=model)
        beta = args["beta"]
        x_train, y_train = get_pairs_from_full_dataset(args)

        y_train_binary = y_train.copy().astype(int)
        mask = y_train>0

        positive_indices = np.where(mask)[0]
        negative_indices = np.where(~mask)[0]

        num_positive = positive_indices.shape[0]
        num_negative = negative_indices.shape[0]
        new_num_negative = max(negative_indices.shape[0], 40*num_positive)

        negative_indices = np.random.choice(negative_indices, size=new_num_negative, replace=True)

        new_indices = np.concatenate([
            positive_indices,
            negative_indices
        ])

        x_train = x_train[new_indices]
        y_train = y_train[new_indices]

        scaler = StandardScaler()
        x_train = scaler.fit_transform(x_train)


        K.clear_session()
        early_stopping = EarlyStopping(monitor="cpcLoss", patience=100, restore_best_weights=True)
        t_model = keras.wrappers.scikit_learn.KerasRegressor(
            build_fn=baseline_model,
            epochs=500,
            batch_size=2**12,
            callbacks=[early_stopping],
            verbose=0
        )
        history = t_model.fit(x_train, y_train)


        # Test
        args = get_full_dataset(i, t_test_counties, all_county_idxs, model=model)
        x_test, y_test = get_pairs_from_full_dataset(args)
        x_test = scaler.transform(x_test)

        scores = evaluate_ml_models(t_model, x_test, y_test, beta, args)
        
        
        print("\t%s,%0.4f,%0.3f,%0.3f,%0.3f,%0.3f,%0.3f,%0.3f" % (model, beta, scores[0], scores[1], scores[2], scores[3], scores[4], scores[5]))
        year_results.append((alpha, beta, scores[0], scores[1], scores[2], scores[3], scores[4], scores[5]))


    year_results = np.array(year_results)
    print("\t",year_results.mean(axis=0))
    all_to_all_cv.append(year_results.mean(axis=0))

KeyboardInterrupt: 

In [None]:
print("Migration Model,Best Alpha,CPC,CPC_d,MAE,R2,MAE,R2")
print(model, end=",")
values = np.array(all_to_all_cv)
means = values.mean(axis=0)
stds = values.std(axis=0)

for i in range(means.shape[0]):
    print("%0.4f (%0.4f)," % (means[i], stds[i]), end="")
print("")

### Unflooded to unflooded

In [None]:
%%time
unflooded_to_unflooded_cv = []

kf = KFold(n_splits=5)
for train_index, test_index in kf.split(unflooded_county_idxs):

    t_train_counties = unflooded_county_idxs[train_index]
    t_test_counties = unflooded_county_idxs[test_index]

    year_results = []
    for i, year in enumerate(years):
        # Load train
        args = get_full_dataset(i, t_train_counties, unflooded_county_idxs, model=model)
        beta = args["beta"]
        x_train, y_train = get_pairs_from_full_dataset(args)

        y_train_binary = y_train.copy().astype(int)
        mask = y_train>0

        positive_indices = np.where(mask)[0]
        negative_indices = np.where(~mask)[0]

        num_positive = positive_indices.shape[0]
        num_negative = negative_indices.shape[0]
        new_num_negative = max(negative_indices.shape[0], 40*num_positive)

        negative_indices = np.random.choice(negative_indices, size=new_num_negative, replace=True)

        new_indices = np.concatenate([
            positive_indices,
            negative_indices
        ])

        x_train = x_train[new_indices]
        y_train = y_train[new_indices]

        scaler = StandardScaler()
        x_train = scaler.fit_transform(x_train)


        K.clear_session()
        early_stopping = EarlyStopping(monitor="cpcLoss", patience=100, restore_best_weights=True)
        t_model = keras.wrappers.scikit_learn.KerasRegressor(
            build_fn=baseline_model,
            epochs=500,
            batch_size=2**12,
            callbacks=[early_stopping],
            verbose=0
        )
        history = t_model.fit(x_train, y_train)


        # Test
        args = get_full_dataset(i, t_test_counties, unflooded_county_idxs, model=model)
        x_test, y_test = get_pairs_from_full_dataset(args)
        x_test = scaler.transform(x_test)

        scores = evaluate_ml_models(t_model, x_test, y_test, beta, args)

        print("\t%s,%0.4f,%0.3f,%0.3f,%0.3f,%0.3f,%0.3f,%0.3f" % (model, beta, scores[0], scores[1], scores[2], scores[3], scores[4], scores[5]))
        year_results.append((alpha, beta, scores[0], scores[1], scores[2], scores[3], scores[4], scores[5]))


    year_results = np.array(year_results)
    print("\t",year_results.mean(axis=0))
    unflooded_to_unflooded_cv.append(year_results.mean(axis=0))

In [None]:
print("Migration Model,Best Alpha,CPC,CPC_d,MAE,R2,MAE,R2")
print(model, end=",")
values = np.array(unflooded_to_unflooded_cv)
means = values.mean(axis=0)
stds = values.std(axis=0)

for i in range(means.shape[0]):
    print("%0.4f (%0.4f)," % (means[i], stds[i]), end="")
print("")