# High Resolution Conflict Forecasting with Spatial Convolutions and Long Short-Term Memory

## Replication Archive

[Benjamin J. Radford](https://www.benradford.com)    
Assistant Professor  
UNC Charlotte  
bradfor7@uncc.edu  

This file replicates the following tables and figures:

* Table 5

## Imports and seeds

In [1]:
import sys
import os
import gc
import logging

import pandas as pd
import numpy as np
from datetime import datetime

from sklearn.ensemble import RandomForestRegressor
from joblib import dump, load

from itertools import product
from math import isnan

import views
from views import Period, Model, Downsampling
from views.utils.data import assign_into_df
from views.apps.transforms import lib as translib
from views.apps.evaluation import lib as evallib, feature_importance as fi
from views.apps.model import api
from views.apps.extras import extras

import keras
from keras.models import Model
from keras.layers import Input, ConvLSTM2D, Activation, Conv3D, BatchNormalization, Dropout, Bidirectional, GaussianNoise
from keras import optimizers

import tensorflow as tf

import random
import geoplot as gplt
import contextily as ctx

import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import AxesGrid
from mpl_toolkits.axes_grid1 import make_axes_locatable

from numpy.random import seed
seed(1234)
tf.random.set_seed(1234)

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

pgid_groupa = [149426,149427,149428,149429,149430, 148706,148707,148708,148709,148710, 147986,
147987,147988,147989,147990, 147266,147267,147268,147269,147270, 146546,146547,146548,
146549,146550]

pgid_groupb = [114918,114919,114920,114921,114922, 114198,114199,114200,114201,114202, 113478,
113479,113480,113481,113482, 112758,112759,112760,112761,112762, 112038,112039,112040,
112041,112042]

cols_feats = [
    "ln_ged_best_sb",
    "pgd_bdist3",
    "pgd_capdist",
    "pgd_agri_ih",
    "pgd_pop_gpw_sum",
    "pgd_ttime_mean",
    "spdist_pgd_diamsec",
    "pgd_pasture_ih",
    "pgd_savanna_ih",
    "pgd_forest_ih",
    "pgd_urban_ih",
    "pgd_barren_ih",
    "pgd_gcp_mer"
]



## Load the data

Caution: the .csv file itself is approx. 17Gb. 

In [2]:
out_df = pd.read_csv("../data/competition_model/feature_dropout/bjr_all_preds_drop.csv")

## Table 5: Pearson's correlation...

In [3]:
from sklearn.metrics import r2_score

print("\\hline\\hline\\\\ \n & \multicolumn{2}{c}{\emph{SetA}} & \multicolumn{2}{c}{\emph{SetB}} \\\\")
print(" Feature & Within & Outside & Within & Outside \\\\ \\hline")
for median_var in cols_feats:
    obs = []
    pre = []
    for ss in range(2,8):
        var1 = "Radford_clstm_s"+str(ss)
        var2 = "Radford_clstm_s"+str(ss)+"_drop_in_"+median_var
        subset = out_df.loc[(out_df["pg_id"].isin(pgid_groupa)) & (out_df["month_id"].isin([490,491,492,493,494,495]))][[var1, var2]].dropna()
        obs = obs + subset[var1].tolist()
        pre = pre + subset[var2].tolist()
    r2_in_a = round(np.corrcoef(obs,pre)[0,1],3)
    
    obs = []
    pre = []
    for ss in range(2,8):
        var1 = "Radford_clstm_s"+str(ss)
        var2 = "Radford_clstm_s"+str(ss)+"_drop_out_"+median_var
        subset = out_df.loc[(out_df["pg_id"].isin(pgid_groupa)) & (out_df["month_id"].isin([490,491,492,493,494,495]))][[var1, var2]].dropna()
        obs = obs + subset[var1].tolist()
        pre = pre + subset[var2].tolist()
    r2_out_a = round(np.corrcoef(obs,pre)[0,1],3)
    
    obs = []
    pre = []
    for ss in range(2,8):
        var1 = "Radford_clstm_s"+str(ss)
        var2 = "Radford_clstm_s"+str(ss)+"_drop_in_"+median_var
        subset = out_df.loc[(out_df["pg_id"].isin(pgid_groupb)) & (out_df["month_id"].isin([490,491,492,493,494,495]))][[var1, var2]].dropna()
        obs = obs + subset[var1].tolist()
        pre = pre + subset[var2].tolist()
    r2_in_b = round(np.corrcoef(obs,pre)[0,1],3)
    
    obs = []
    pre = []
    for ss in range(2,8):
        var1 = "Radford_clstm_s"+str(ss)
        var2 = "Radford_clstm_s"+str(ss)+"_drop_out_"+median_var
        subset = out_df.loc[(out_df["pg_id"].isin(pgid_groupb)) & (out_df["month_id"].isin([490,491,492,493,494,495]))][[var1, var2]].dropna()
        obs = obs + subset[var1].tolist()
        pre = pre + subset[var2].tolist()
    r2_out_b = round(np.corrcoef(obs,pre)[0,1],3)
    
    varname = median_var.replace("_","\\_")
    print(f"{varname} & {r2_in_a:.3f} & {r2_out_a:.3f} & {r2_in_b:.3f} & {r2_out_b:.3f} \\\\")


\hline\hline\\ 
 & \multicolumn{2}{c}{\emph{SetA}} & \multicolumn{2}{c}{\emph{SetB}} \\
 Feature & Within & Outside & Within & Outside \\ \hline
ln\_ged\_best\_sb & -0.157 & 0.862 & 0.036 & 0.978 \\
pgd\_bdist3 & 0.810 & 0.809 & 0.975 & 0.972 \\
pgd\_capdist & 0.805 & 0.792 & 0.967 & 0.979 \\
pgd\_agri\_ih & 0.811 & 0.801 & 0.974 & 0.968 \\
pgd\_pop\_gpw\_sum & 0.802 & 0.807 & 0.973 & 0.978 \\
pgd\_ttime\_mean & 0.804 & 0.842 & 0.974 & 0.974 \\
spdist\_pgd\_diamsec & 0.807 & 0.798 & 0.969 & 0.974 \\
pgd\_pasture\_ih & 0.803 & 0.813 & 0.973 & 0.976 \\
pgd\_savanna\_ih & 0.808 & 0.824 & 0.969 & 0.977 \\
pgd\_forest\_ih & 0.805 & 0.805 & 0.973 & 0.972 \\
pgd\_urban\_ih & 0.803 & 0.819 & 0.973 & 0.977 \\
pgd\_barren\_ih & 0.797 & 0.823 & 0.973 & 0.971 \\
pgd\_gcp\_mer & 0.804 & 0.803 & 0.974 & 0.976 \\
