# High Resolution Conflict Forecasting with Spatial Convolutions and Long Short-Term Memory

## Replication Archive

[Benjamin J. Radford](https://www.benradford.com)    
Assistant Professor  
UNC Charlotte  
bradfor7@uncc.edu  

This file replicates the following tables and figures:

* Table 4 (columns 1-2)

## Imports and seeds

In [1]:
#### import sys
import os
import gc
import logging

import pandas as pd
import geopandas as gpd
import numpy as np
from datetime import datetime

from sklearn.ensemble import RandomForestRegressor
from joblib import dump, load

from itertools import product
from math import isnan

import views
from views import Period, Model, Downsampling
from views.utils.data import assign_into_df
from views.apps.transforms import lib as translib
from views.apps.evaluation import lib as evallib, feature_importance as fi
from views.apps.model import api
from views.apps.extras import extras

import keras
from keras.models import Model
from keras.layers import Input, ConvLSTM2D, Activation, Conv3D, BatchNormalization, Dropout, Bidirectional, GaussianNoise
from keras import optimizers

import tensorflow as tf

import random
import geoplot as gplt
import contextily as ctx

import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import AxesGrid
from mpl_toolkits.axes_grid1 import make_axes_locatable
import matplotlib.patheffects as pe

from numpy.random import seed
seed(1234)
tf.random.set_seed(1234)

pgid_groupa = [149426,149427,149428,149429,149430, 148706,148707,148708,148709,148710, 147986,
147987,147988,147989,147990, 147266,147267,147268,147269,147270, 146546,146547,146548,
146549,146550]

pgid_groupb = [114918,114919,114920,114921,114922, 114198,114199,114200,114201,114202, 113478,
113479,113480,113481,113482, 112758,112759,112760,112761,112762, 112038,112039,112040,
112041,112042]

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

In [2]:
# Do you wish to fetch the latest public data? If so, change False to True and run this cell
# Cells below will fail if this is not run if you haven't imported data yourself yet.
redownload_data = False

if redownload_data:
    path_zip = views.apps.data.public.fetch_latest_zip_from_website(path_dir_destination=views.DIR_SCRATCH)
    views.apps.data.public.import_tables_and_geoms(tables=views.TABLES, geometries=views.GEOMETRIES, path_zip=path_zip)

dataset = views.DATASETS["pgm_africa_imp_0"]
df = dataset.gdf
df.reset_index(inplace=True)

update = pd.read_csv("../data/pgm.csv")
df = pd.merge(df[["geom","pg_id","month_id"]], update, on=["pg_id","month_id"])

In [3]:
df = df.loc[(df["year"]<2021) & (df["year"]>1989)]
df = df.loc[(df["year"]<2020) | (df["month"]<9)]
df["coordx"] = df["geom"].apply(lambda x: x.centroid.x)
df["coordy"] = df["geom"].apply(lambda y: y.centroid.y)
df["col_idx"] = [int(a) for a in list((df["coordx"] - df["coordx"].min())*2)]
df["row_idx"] = [int(a) for a in list((df["coordy"] - df["coordy"].min())*2)]
df["year_idx"] = [int(a) for a in list((df["year"] - df["year"].min()))]
df["month_idx"] = [int(a) for a in list((df["month"] - df["month"].min()))]
df["year_month_idx"] = [int(a) for a in list((df["month_id"] - df["month_id"].min()))]

# df.drop("geom", inplace=True, axis=1) 


##
## Make Lags
##
df1 = df[["year_month_idx","pg_id","ln_ged_best_sb"]].copy()
df2 = df[["year_month_idx","pg_id","ln_ged_best_sb"]].copy()
df3 = df[["year_month_idx","pg_id","ln_ged_best_sb"]].copy()
df4 = df[["year_month_idx","pg_id","ln_ged_best_sb"]].copy()
df5 = df[["year_month_idx","pg_id","ln_ged_best_sb"]].copy()
df6 = df[["year_month_idx","pg_id","ln_ged_best_sb"]].copy()
df7 = df[["year_month_idx","pg_id","ln_ged_best_sb"]].copy()

df1["year_month_idx"] = df1["year_month_idx"]+1
df2["year_month_idx"] = df2["year_month_idx"]+2
df3["year_month_idx"] = df3["year_month_idx"]+3
df4["year_month_idx"] = df4["year_month_idx"]+4
df5["year_month_idx"] = df5["year_month_idx"]+5
df6["year_month_idx"] = df6["year_month_idx"]+6
df7["year_month_idx"] = df7["year_month_idx"]+7

df1.columns = ["year_month_idx","pg_id","ln_ged_best_sb_l1"]
df2.columns = ["year_month_idx","pg_id","ln_ged_best_sb_l2"]
df3.columns = ["year_month_idx","pg_id","ln_ged_best_sb_l3"]
df4.columns = ["year_month_idx","pg_id","ln_ged_best_sb_l4"]
df5.columns = ["year_month_idx","pg_id","ln_ged_best_sb_l5"]
df6.columns = ["year_month_idx","pg_id","ln_ged_best_sb_l6"]
df7.columns = ["year_month_idx","pg_id","ln_ged_best_sb_l7"]

df = pd.merge(df,df1,how="left",on=["year_month_idx","pg_id"])
df = pd.merge(df,df2,how="left",on=["year_month_idx","pg_id"])
df = pd.merge(df,df3,how="left",on=["year_month_idx","pg_id"])
df = pd.merge(df,df4,how="left",on=["year_month_idx","pg_id"])
df = pd.merge(df,df5,how="left",on=["year_month_idx","pg_id"])
df = pd.merge(df,df6,how="left",on=["year_month_idx","pg_id"])
df = pd.merge(df,df7,how="left",on=["year_month_idx","pg_id"])

df["delta_1"] = df["ln_ged_best_sb"] - df["ln_ged_best_sb_l1"]
df["delta_2"] = df["ln_ged_best_sb"] - df["ln_ged_best_sb_l2"]
df["delta_3"] = df["ln_ged_best_sb"] - df["ln_ged_best_sb_l3"]
df["delta_4"] = df["ln_ged_best_sb"] - df["ln_ged_best_sb_l4"]
df["delta_5"] = df["ln_ged_best_sb"] - df["ln_ged_best_sb_l5"]
df["delta_6"] = df["ln_ged_best_sb"] - df["ln_ged_best_sb_l6"]
df["delta_7"] = df["ln_ged_best_sb"] - df["ln_ged_best_sb_l7"]

del df1
del df2
del df3
del df4
del df5
del df6
del df7

gc.collect()

pg_col_row = df[["pg_id","col_idx","row_idx","geom"]].drop_duplicates()
pg_col_row = pd.concat([pg_col_row] * 7)
pg_col_row["month_id"] = ([489]*10677) + ([490]*10677) + ([491]*10677) + ([492]*10677) + ([493]*10677) + ([494]*10677) + ([495]*10677)
pg_col_row["year_month_idx"] = pg_col_row["month_id"] - 121

print(pg_col_row.shape)
df = pd.concat([df,pg_col_row])
print(df.shape)

df.reset_index(inplace=True)

(74739, 6)
(4003875, 289)


In [4]:
set1 = pd.read_csv("../data/expanded_features/forecasts/updated_ViEWSpred_competition_radford_set1.csv")
set2 = pd.read_csv("../data/expanded_features/forecasts/updated_ViEWSpred_competition_radford_set2.csv")
set3 = pd.read_csv("../data/expanded_features/forecasts/updated_ViEWSpred_competition_radford_set3.csv")

forecasts = pd.concat([set3,set2,set1])

df = pd.merge(df, forecasts, how="outer", left_on=["pg_id","month_id"], right_on=["pg_id","month_id"])

In [5]:
def get_row_metrics(actual,pred,rowname):
    mse = np.mean(np.power(actual - pred,2))
    tadda = evallib.tadda_score(actual, pred)
    return f"{rowname} & {mse:.6f} & {tadda:.6f} \\\\"

# print("\hline \n \\\\ Steps & MSE & TADDA \\\\ \n \hline \\\\")
# # print(get_row_metrics(df.loc[(df["year"]>2013) & (df["year"]<2017)]["delta_1"], 
# #                       df.loc[(df["year"]>2013) & (df["year"]<2017)]["Radford_clstm_s1"],
# #                       "$s=1$"))
# print(get_row_metrics(df.loc[(df["year"]>2013) & (df["year"]<2017)]["delta_2"], 
#                       df.loc[(df["year"]>2013) & (df["year"]<2017)]["Radford_clstm_s2"],
#                       "$s=2$"))
# print(get_row_metrics(df.loc[(df["year"]>2013) & (df["year"]<2017)]["delta_3"], 
#                       df.loc[(df["year"]>2013) & (df["year"]<2017)]["Radford_clstm_s3"],
#                       "$s=3$"))
# print(get_row_metrics(df.loc[(df["year"]>2013) & (df["year"]<2017)]["delta_4"], 
#                       df.loc[(df["year"]>2013) & (df["year"]<2017)]["Radford_clstm_s4"],
#                       "$s=4$"))
# print(get_row_metrics(df.loc[(df["year"]>2013) & (df["year"]<2017)]["delta_5"], 
#                       df.loc[(df["year"]>2013) & (df["year"]<2017)]["Radford_clstm_s5"],
#                       "$s=5$"))
# print(get_row_metrics(df.loc[(df["year"]>2013) & (df["year"]<2017)]["delta_6"], 
#                       df.loc[(df["year"]>2013) & (df["year"]<2017)]["Radford_clstm_s6"],
#                       "$s=6$"))
# print(get_row_metrics(df.loc[(df["year"]>2013) & (df["year"]<2017)]["delta_7"], 
#                       df.loc[(df["year"]>2013) & (df["year"]<2017)]["Radford_clstm_s7"],
#                       "$s=7$"))
# print("\hline")


print("\hline \n \\\\ Steps & MSE & TADDA \\\\ \n \hline \\\\")
# print(get_row_metrics(df.loc[(df["year"]>2016) & (df["year"]<2020)]["delta_1"], 
#                       df.loc[(df["year"]>2016) & (df["year"]<2020)]["Radford_clstm_s1"],
#                       "$s=1$"))
print(get_row_metrics(df.loc[(df["year"]>2016) & (df["year"]<2020)]["delta_2"], 
                      df.loc[(df["year"]>2016) & (df["year"]<2020)]["Radford_clstm_s2"],
                      "$s=2$"))
print(get_row_metrics(df.loc[(df["year"]>2016) & (df["year"]<2020)]["delta_3"], 
                      df.loc[(df["year"]>2016) & (df["year"]<2020)]["Radford_clstm_s3"],
                      "$s=3$"))
print(get_row_metrics(df.loc[(df["year"]>2016) & (df["year"]<2020)]["delta_4"], 
                      df.loc[(df["year"]>2016) & (df["year"]<2020)]["Radford_clstm_s4"],
                      "$s=4$"))
print(get_row_metrics(df.loc[(df["year"]>2016) & (df["year"]<2020)]["delta_5"], 
                      df.loc[(df["year"]>2016) & (df["year"]<2020)]["Radford_clstm_s5"],
                      "$s=5$"))
print(get_row_metrics(df.loc[(df["year"]>2016) & (df["year"]<2020)]["delta_6"], 
                      df.loc[(df["year"]>2016) & (df["year"]<2020)]["Radford_clstm_s6"],
                      "$s=6$"))
print(get_row_metrics(df.loc[(df["year"]>2016) & (df["year"]<2020)]["delta_7"], 
                      df.loc[(df["year"]>2016) & (df["year"]<2020)]["Radford_clstm_s7"],
                      "$s=7$"))
print("\hline")

\hline 
 \\ Steps & MSE & TADDA \\ 
 \hline \\
$s=2$ & 0.022364 & 0.015740 \\
$s=3$ & 0.022369 & 0.016105 \\
$s=4$ & 0.022433 & 0.015887 \\
$s=5$ & 0.022557 & 0.015746 \\
$s=6$ & 0.022649 & 0.015840 \\
$s=7$ & 0.022624 & 0.015980 \\
\hline
