In [2]:
import pandas as pd
import numpy as np
from IPython.display import display, HTML
from collections import defaultdict

In [3]:
cities = ['bogota','boston', 'LA', 'chicago']
models = [{'core': True}, {'sd': True}, {'uf': True}, {'m': True}, {'sd': True, 'uf':True}, {'sd': True, 'm': True}, {'uf': True, 'm': True}, {'full': True}] #, 
neigh = 'neigh'

In [4]:
def create_model_string(full=True, sd=False, uf=False, m=False, core=False):
    if sd or uf or m:
        full=False
    name = ['full']
    if core or sd or uf or m:
        name = []
        if core:
            name.append('core')
        else:
            if sd:
                name.append('sd')
            if uf:
                name.append('uf')
            if m:
                name.append('m')
    return '_'.join(name)

def load_results(cities, models, neigh, model_type, target='', od='', load_loo=False, niterations=15000):
    
    frame = pd.DataFrame()
    list_ = []
    for c in cities:
        for m in models:
            if ('m' in m or 'full' in m) and c == 'chicago':
                continue
            model_string = create_model_string(**m)
            filenameformat = '{model_type}_{city}_ego_{model_name}_{od}{niterations}_ncrimes.csv'.format(model_type=model_type, od=od, city=c, model_name=model_string, niterations=niterations)
            df_t = pd.read_csv('../data/generated_files/model_results/pystan_{}'.format(filenameformat), index_col=None, header=0)
            if load_loo:
                filenameformat = '{model_type}_CV_{city}_ego_{model_name}_{od}{niterations}_ncrimes.csv'.format(model_type=model_type, od=od, city=c, model_name=model_string, niterations=niterations)
                df_cv = pd.read_csv('../data/generated_files/model_results/pystanCV_{}'.format(filenameformat),index_col=None, header=0)
                df_cv = df_cv[df_cv.Metric == 'LOO']
                df_t = pd.concat((df_t[df_t.Metric != 'LOO'], df_cv))
            
            df_t['city'] = c
            df_t['model'] = model_string
            df_t.loc[df_t.Metric == 'LOO', 'R1'] = np.around(df_t.loc[df_t.Metric == 'LOO', 'R1'])
            list_.append(df_t)
    frame = pd.concat(list_)
    return frame

In [5]:
model_strings = [create_model_string(**m) for m in models]

model_nice_names = {
    'core': 'Core',
    'sd': 'Social-disorganization (SD)',
    'uf': 'Built environment (BE)',
    'm': 'Mobility (M)',
    'sd_uf': 'SD+BE',
    'sd_m': 'SD+M',
    'uf_m': 'BE+M',
    'full': 'SD+BE+M (Full)'
}

model_strings

['core', 'sd', 'uf', 'm', 'sd_uf', 'sd_m', 'uf_m', 'full']

In [56]:
model_type = 'BSF'
cities_to_compare = ['bogota', 'boston', 'LA', 'chicago']
metrics_to_show = ['R2 marginal', 'R2 conditional', 'LOO']
default_for_chicago = [-1.0, -1.0, -100000]

model_results = defaultdict(list)
for i, m in enumerate(model_strings):
    fetch_cities = cities[:]
    if ('m' in m or 'full' in m):
        fetch_cities.remove('chicago')
        
    m1_df = load_results(fetch_cities, models, neigh, model_type=model_type)
    m1_df['model'] = pd.Categorical(m1_df['model'].values, [create_model_string(**m) for m in models])
    m1_df['city'] = pd.Categorical(m1_df['city'], cities)
            
    m1_df['R1'] = m1_df['R1'].astype('float32')
    
    for c in cities_to_compare:
        if c == 'chicago' and ('m' in m or 'full' in m):
            model_results[c].append(default_for_chicago)
            continue
        
        metrics = []
        for k in metrics_to_show:
            metrics.append(m1_df[(m1_df.Metric == k) & (m1_df.city == c) & (m1_df.model == m)]['R1'].values[0])
        model_results[c].append(metrics)


results_string = model_results.copy()
for c in cities_to_compare:
    results_string[c] = np.array(results_string[c]).astype('str')
    model_results[c] = np.array(model_results[c])
    
    for i, k in enumerate(metrics_to_show):
        bestjs = np.argwhere(model_results[c][:, i] == np.amax(model_results[c][:, i])).flatten()
        
        for j in range(results_string[c].shape[0]):
            if k in {'R2 conditional', 'R2 marginal'}:
                results_string[c][j, i] = '{:0.2f}'.format(model_results[c][j, i])
            else:
                results_string[c][j, i] = '{:0.0f}'.format(model_results[c][j, i])
        if k != 'R2 conditional':
            for b in bestjs:
                results_string[c][b, i] = '$\\bst{{{}}}$'.format(results_string[c][b, i])
        

for i, m in enumerate(model_strings):
    final_string = []
    for c in cities_to_compare:
        chicago_filter = [x.replace('-100000.00', '-').replace('-1.00', '-').replace('-100000', '-') for x in results_string[c][i].tolist()]
        final_string.extend(chicago_filter)
    
    print('{} & {} ({}) & {} && {} ({}) & {} && {} ({}) & {} && {} ({}) & {} \\\\'.format(*([model_nice_names[m]] + final_string)))


Core & 0.54 (0.75) & -3897 && 0.21 (0.64) & -2035 && 0.18 (0.68) & -9665 && 0.09 (0.68) & -8415 \\
Social-disorganization (SD) & 0.57 (0.75) & -3891 && 0.55 (0.68) & -2019 && 0.53 (0.72) & -9529 && 0.66 (0.78) & -8019 \\
Built environment (BE) & 0.61 (0.76) & -3881 && 0.36 (0.68) & -2014 && 0.27 (0.69) & -9629 && 0.21 (0.69) & -8371 \\
Mobility (M) & 0.64 (0.80) & -3804 && 0.42 (0.70) & -2001 && 0.25 (0.70) & -9570 && - (-) & - \\
SD+BE & 0.64 (0.76) & -3881 && 0.65 (0.72) & -1987 && 0.56 (0.72) & -9508 && $\bst{0.67}$ (0.79) & $\bst{-8003}$ \\
SD+M & 0.66 (0.81) & $\bst{-3795}$ && 0.67 (0.73) & -1973 && 0.55 (0.73) & -9467 && - (-) & - \\
BE+M & 0.68 (0.80) & -3819 && 0.50 (0.72) & -1989 && 0.30 (0.70) & -9585 && - (-) & - \\
SD+BE+M (Full) & $\bst{0.70}$ (0.80) & -3808 && $\bst{0.70}$ (0.75) & $\bst{-1957}$ && $\bst{0.56}$ (0.74) & $\bst{-9454}$ && - (-) & - \\


## The need for spatial models

In [62]:
from collections import defaultdict
models_to_compare = ['BSF', 'nb']
metrics_to_show = ['LOO', 'R2 conditional', 'MC_p']

for c in cities:
    for i, m in enumerate(model_strings):
        if ('m' in m or 'full' in m) and c == 'chicago':
                continue
        
        model_results = defaultdict(list)
        for model_type in models_to_compare:
            m1_df = load_results(cities, models, neigh, model_type=model_type)
            m1_df['model'] = pd.Categorical(m1_df['model'].values, [create_model_string(**m) for m in models])
            m1_df['city'] = pd.Categorical(m1_df['city'], cities)
            
            row = m1_df[(m1_df.city == c) & (m1_df.model == m)].copy()
            row['R1'] = row['R1'].astype('float32')
            
            for k in metrics_to_show:
                model_results[k].append(row[row.Metric == k]['R1'].values[0])
        
        city_column = ''
        if i == 0:
            city_column = '\multirow{{1}}{{*}}{{{cityname}}} '.format(cityname=c.title() if c != 'LA' else c)
        
        # A string for each metric
        results_string = model_results.copy()
        for k in metrics_to_show:
            bestjs = np.argwhere(model_results[k] == np.amin(model_results[k])).flatten()
            if k == 'MC_p':
                results_string[k] = ['{:0.3f}'.format(x) for x in results_string[k]]
            elif k == 'R2 conditional':
                bestjs = np.argwhere(model_results[k] == np.amax(model_results[k])).flatten()
                results_string[k] = ['{:0.2f}'.format(x) for x in results_string[k]]
            elif k == 'LOO':
                bestjs = np.argwhere(model_results[k] == np.amax(model_results[k])).flatten()
                results_string[k] = ['{:0.0f}'.format(x) for x in results_string[k]]
            for b in bestjs:
                results_string[k][b] = '\\textbf{{{}}}'.format(results_string[k][b])
        
        final_string = [city_column, model_nice_names[m]]
        for i in range(len(models_to_compare)):
            for k in metrics_to_show:
                final_string.append(results_string[k][i])
        
        print('{}& {} & {} & {} & {} && {} & {} & {} \\\\'.format(*final_string))
    print('\midrule')

\multirow{1}{*}{Bogota} & Core & \textbf{-3897} & \textbf{0.75} & \textbf{-0.034} && -4126 & 0.53 & 0.455 \\
& Social-disorganization (SD) & \textbf{-3891} & \textbf{0.75} & \textbf{-0.043} && -4079 & 0.58 & 0.354 \\
& Built environment (BE) & \textbf{-3881} & \textbf{0.76} & \textbf{-0.036} && -4061 & 0.61 & 0.371 \\
& Mobility (M) & \textbf{-3804} & \textbf{0.80} & \textbf{-0.042} && -4034 & 0.64 & 0.460 \\
& SD+BE & \textbf{-3880} & \textbf{0.76} & \textbf{-0.035} && -4013 & 0.65 & 0.287 \\
& SD+M & \textbf{-3795} & \textbf{0.81} & \textbf{-0.050} && -3988 & 0.67 & 0.374 \\
& BE+M & \textbf{-3819} & \textbf{0.80} & \textbf{-0.025} && -3980 & 0.68 & 0.361 \\
& SD+BE+M (Full) & \textbf{-3809} & \textbf{0.80} & \textbf{-0.040} && -3941 & 0.71 & 0.284 \\
\midrule
\multirow{1}{*}{Boston} & Core & \textbf{-2035} & \textbf{0.64} & \textbf{-0.005} && -2209 & 0.22 & 0.418 \\
& Social-disorganization (SD) & \textbf{-2019} & \textbf{0.68} & \textbf{-0.003} && -2088 & 0.55 & 0.236 \\
& Built en

## BSF vs MSF vs RSR

In [68]:
from collections import defaultdict
models_to_compare = ['BSF', 'REESF-a', 'ESF']
metrics_to_show = ['LOO', 'MC_p']

for c in cities:
    for i, m in enumerate(model_strings):
        if ('m' in m or 'full' in m) and c == 'chicago':
                continue
        
        model_results = defaultdict(list)
        for model_type in models_to_compare:
            m1_df = load_results(cities, models, neigh, model_type=model_type)
            m1_df['model'] = pd.Categorical(m1_df['model'].values, [create_model_string(**m) for m in models])
            m1_df['city'] = pd.Categorical(m1_df['city'], cities)
            
            row = m1_df[(m1_df.city == c) & (m1_df.model == m)].copy()
            row['R1'] = row['R1'].astype('float32')
            row.loc[row.Metric == 'LOO', 'R1'] = row[row.Metric == 'LOO']['R1'].astype('int')
            
            for k in metrics_to_show:
                model_results[k].append(row[row.Metric == k]['R1'].values[0])
        
        city_column = ''
        if i == 0:
            city_column = '\multirow{{1}}{{*}}{{{cityname}}} '.format(cityname=c.title() if c != 'LA' else c)
        
        # A string for each metric
        results_string = model_results.copy()
        for k in metrics_to_show:
            bestjs = np.argwhere(model_results[k] == np.amax(model_results[k])).flatten()
            if k == 'MC_p':
                results_string[k] = ['{:0.3f}'.format(x) for x in results_string[k]]
            else:
                results_string[k] = ['{:0.0f}'.format(x) for x in results_string[k]]
                for b in bestjs:
                    results_string[k][b] = '\\textbf{{{}}}'.format(results_string[k][b])
        
        final_string = [city_column, model_nice_names[m]]
        for i in range(len(models_to_compare)):
            for k in metrics_to_show:
                final_string.append(results_string[k][i])
        
        print('{}& {} & {} & {} && {} & {} && {} & {}\\\\'.format(*final_string))
    print('\midrule')

\multirow{1}{*}{Bogota} & Core & \textbf{-3897} & -0.034 && -3899 & -0.045 && -3902 & -0.041\\
& Social-disorganization (SD) & \textbf{-3891} & -0.043 && -3895 & -0.052 && -3896 & -0.049\\
& Built environment (BE) & \textbf{-3881} & -0.036 && -3882 & -0.045 && -3884 & -0.042\\
& Mobility (M) & -3804 & -0.042 && \textbf{-3803} & -0.048 && -3807 & -0.046\\
& SD+BE & \textbf{-3880} & -0.035 && -3882 & -0.043 && -3884 & -0.040\\
& SD+M & \textbf{-3795} & -0.050 && -3796 & -0.057 && -3798 & -0.056\\
& BE+M & -3819 & -0.025 && \textbf{-3817} & -0.033 && -3822 & -0.032\\
& SD+BE+M (Full) & \textbf{-3809} & -0.040 && -3810 & -0.049 && -3810 & -0.046\\
\midrule
\multirow{1}{*}{Boston} & Core & \textbf{-2035} & -0.005 && \textbf{-2035} & -0.016 && \textbf{-2035} & -0.014\\
& Social-disorganization (SD) & \textbf{-2019} & -0.003 && -2020 & -0.017 && -2020 & -0.016\\
& Built environment (BE) & -2014 & -0.033 && \textbf{-2013} & -0.044 && -2014 & -0.044\\
& Mobility (M) & -2001 & -0.026 && \textbf{

## Od-d vs Od-m vs binary

In [60]:
from collections import defaultdict
models_to_compare = ['', 'ODd_', 'ODm_']
metrics_to_show = ['LOO', 'MC_p']

for c in cities:
    for i, m in enumerate(model_strings):
        if ('m' in m or 'full' in m) and c == 'chicago':
                continue
        
        model_results = defaultdict(list)
        for model_type in models_to_compare:
            
            # Fix for chicago
            if model_type == 'ODm_' and c == 'chicago':
                for k in metrics_to_show:
                    model_results[k].append(-1000000)
            else:
                m1_df = load_results([c], models, neigh, model_type='BSF', od=model_type, niterations=20000 if model_type else 15000)
                m1_df['model'] = pd.Categorical(m1_df['model'].values, [create_model_string(**m) for m in models])
                m1_df['city'] = pd.Categorical(m1_df['city'], cities)

                row = m1_df[(m1_df.city == c) & (m1_df.model == m)].copy()
                row['R1'] = row['R1'].astype('float32')

                for k in metrics_to_show:
                    model_results[k].append(row[row.Metric == k]['R1'].values[0])
        
        city_column = ''
        if i == 0:
            city_column = '\multirow{{1}}{{*}}{{{cityname}}} '.format(cityname=c.title() if c != 'LA' else c)
        
        # A string for each metric
        results_string = model_results.copy()
        for k in metrics_to_show:
            bestjs = np.argwhere(model_results[k] == np.amax(model_results[k])).flatten()
            if k == 'MC_p':
                results_string[k] = ['{:0.3f}'.format(x) for x in results_string[k]]
            else:
                results_string[k] = ['{:0.0f}'.format(x) for x in results_string[k]]
                for b in bestjs:
                    results_string[k][b] = '\\textbf{{{}}}'.format(results_string[k][b])
        
        final_string = [city_column, model_nice_names[m]]
        for i in range(len(models_to_compare)):
            for k in metrics_to_show:
                final_string.append(results_string[k][i])
        
        print('{}& {} & {} & {} && {} & {} && {} & {}\\\\'.format(*final_string))
    print('\midrule')

\multirow{1}{*}{Bogota} & Core & \textbf{-3897} & -0.034 && -3999 & 0.016 && -4104 & 0.073\\
& Social-disorganization (SD) & \textbf{-3891} & -0.043 && -3969 & 0.010 && -4051 & 0.060\\
& Built environment (BE) & \textbf{-3881} & -0.036 && -3971 & 0.003 && -4051 & 0.048\\
& Mobility (M) & \textbf{-3804} & -0.042 && -3906 & 0.010 && -4021 & 0.037\\
& SD+BE & \textbf{-3880} & -0.035 && -3949 & 0.006 && -4000 & 0.042\\
& SD+M & \textbf{-3795} & -0.050 && -3879 & 0.002 && -3964 & 0.031\\
& BE+M & \textbf{-3819} & -0.025 && -3889 & 0.003 && -3975 & 0.027\\
& SD+BE+M (Full) & \textbf{-3809} & -0.040 && -3873 & 0.001 && -3929 & 0.027\\
\midrule
\multirow{1}{*}{Boston} & Core & \textbf{-2035} & -0.005 && -2078 & 0.036 && -2208 & 0.118\\
& Social-disorganization (SD) & \textbf{-2019} & -0.003 && -2037 & 0.024 && -2081 & 0.107\\
& Built environment (BE) & \textbf{-2014} & -0.033 && -2040 & 0.010 && -2168 & 0.076\\
& Mobility (M) & \textbf{-2001} & -0.026 && -2014 & -0.010 && -2094 & 0.029\\
& SD+