# High Resolution Conflict Forecasting with Spatial Convolutions and Long Short-Term Memory

## Replication Archive

[Benjamin J. Radford](https://www.benradford.com)    
Assistant Professor  
UNC Charlotte  
bradfor7@uncc.edu  

This file produces all necessary data for the feature dropout study. 

**Warning:** This file may take several days to run depending on your computer's speed.

## Imports and seeds

In [None]:
import sys
import os
import gc
import logging

import pandas as pd
import numpy as np
from datetime import datetime

from sklearn.ensemble import RandomForestRegressor
from joblib import dump, load

from itertools import product
from math import isnan

import views
from views import Period, Model, Downsampling
from views.utils.data import assign_into_df
from views.apps.transforms import lib as translib
from views.apps.evaluation import lib as evallib, feature_importance as fi
from views.apps.model import api
from views.apps.extras import extras

import keras
from keras.models import Model
from keras.layers import Input, ConvLSTM2D, Activation, Conv3D, BatchNormalization, Dropout, Bidirectional, GaussianNoise
from keras import optimizers

import tensorflow as tf

import random
import geoplot as gplt
import contextily as ctx

import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import AxesGrid
from mpl_toolkits.axes_grid1 import make_axes_locatable

from numpy.random import seed
seed(1234)
tf.random.set_seed(1234)

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

pgid_groupa = [149426,149427,149428,149429,149430, 148706,148707,148708,148709,148710, 147986,
147987,147988,147989,147990, 147266,147267,147268,147269,147270, 146546,146547,146548,
146549,146550]

pgid_groupb = [114918,114919,114920,114921,114922, 114198,114199,114200,114201,114202, 113478,
113479,113480,113481,113482, 112758,112759,112760,112761,112762, 112038,112039,112040,
112041,112042]

cols_feats = [
    "ln_ged_best_sb",
    "pgd_bdist3",
    "pgd_capdist",
    "pgd_agri_ih",
    "pgd_pop_gpw_sum",
    "pgd_ttime_mean",
    "spdist_pgd_diamsec",
    "pgd_pasture_ih",
    "pgd_savanna_ih",
    "pgd_forest_ih",
    "pgd_urban_ih",
    "pgd_barren_ih",
    "pgd_gcp_mer"
]



model = keras.models.load_model("../../supplemental_data/competition_model/model_competition_entry.h5")

model.summary()

gc.collect()

In [None]:
# Do you wish to fetch the latest public data? If so, change False to True and run this cell
# Cells below will fail if this is not run if you haven't imported data yourself yet.

redownload_data = False

if redownload_data:
    path_zip = views.apps.data.public.fetch_latest_zip_from_website(path_dir_destination=views.DIR_SCRATCH)
    views.apps.data.public.import_tables_and_geoms(tables=views.TABLES, geometries=views.GEOMETRIES, path_zip=path_zip)

dataset = views.DATASETS["pgm_africa_imp_0"]
df = dataset.gdf
df.reset_index(inplace=True)

update = pd.read_csv("../../data/pgm.csv")
df = pd.merge(df[["geom","pg_id","month_id"]], update, on=["pg_id","month_id"])

df = df.loc[(df["year"]<2021) & (df["year"]>1989)]
df = df.loc[(df["year"]<2020) | (df["month"]<9)]
df["coordx"] = df["geom"].apply(lambda x: x.centroid.x)
df["coordy"] = df["geom"].apply(lambda y: y.centroid.y)
df["col_idx"] = [int(a) for a in list((df["coordx"] - df["coordx"].min())*2)]
df["row_idx"] = [int(a) for a in list((df["coordy"] - df["coordy"].min())*2)]
df["year_idx"] = [int(a) for a in list((df["year"] - df["year"].min()))]
df["month_idx"] = [int(a) for a in list((df["month"] - df["month"].min()))]
df["year_month_idx"] = [int(a) for a in list((df["month_id"] - df["month_id"].min()))]

df.drop("geom", inplace=True, axis=1)


##
## Make Lags
##
df1 = df[["year_month_idx","pg_id","ln_ged_best_sb"]].copy()
df2 = df[["year_month_idx","pg_id","ln_ged_best_sb"]].copy()
df3 = df[["year_month_idx","pg_id","ln_ged_best_sb"]].copy()
df4 = df[["year_month_idx","pg_id","ln_ged_best_sb"]].copy()
df5 = df[["year_month_idx","pg_id","ln_ged_best_sb"]].copy()
df6 = df[["year_month_idx","pg_id","ln_ged_best_sb"]].copy()
df7 = df[["year_month_idx","pg_id","ln_ged_best_sb"]].copy()

df1["year_month_idx"] = df1["year_month_idx"]+1
df2["year_month_idx"] = df2["year_month_idx"]+2
df3["year_month_idx"] = df3["year_month_idx"]+3
df4["year_month_idx"] = df4["year_month_idx"]+4
df5["year_month_idx"] = df5["year_month_idx"]+5
df6["year_month_idx"] = df6["year_month_idx"]+6
df7["year_month_idx"] = df7["year_month_idx"]+7

df1.columns = ["year_month_idx","pg_id","ln_ged_best_sb_l1"]
df2.columns = ["year_month_idx","pg_id","ln_ged_best_sb_l2"]
df3.columns = ["year_month_idx","pg_id","ln_ged_best_sb_l3"]
df4.columns = ["year_month_idx","pg_id","ln_ged_best_sb_l4"]
df5.columns = ["year_month_idx","pg_id","ln_ged_best_sb_l5"]
df6.columns = ["year_month_idx","pg_id","ln_ged_best_sb_l6"]
df7.columns = ["year_month_idx","pg_id","ln_ged_best_sb_l7"]

df = pd.merge(df,df1,how="left",on=["year_month_idx","pg_id"])
df = pd.merge(df,df2,how="left",on=["year_month_idx","pg_id"])
df = pd.merge(df,df3,how="left",on=["year_month_idx","pg_id"])
df = pd.merge(df,df4,how="left",on=["year_month_idx","pg_id"])
df = pd.merge(df,df5,how="left",on=["year_month_idx","pg_id"])
df = pd.merge(df,df6,how="left",on=["year_month_idx","pg_id"])
df = pd.merge(df,df7,how="left",on=["year_month_idx","pg_id"])

df["delta_1"] = df["ln_ged_best_sb"] - df["ln_ged_best_sb_l1"]
df["delta_2"] = df["ln_ged_best_sb"] - df["ln_ged_best_sb_l2"]
df["delta_3"] = df["ln_ged_best_sb"] - df["ln_ged_best_sb_l3"]
df["delta_4"] = df["ln_ged_best_sb"] - df["ln_ged_best_sb_l4"]
df["delta_5"] = df["ln_ged_best_sb"] - df["ln_ged_best_sb_l5"]
df["delta_6"] = df["ln_ged_best_sb"] - df["ln_ged_best_sb_l6"]
df["delta_7"] = df["ln_ged_best_sb"] - df["ln_ged_best_sb_l7"]

del df1
del df2
del df3
del df4
del df5
del df6
del df7

gc.collect()

cols_ids = [
    "col_idx",
    "row_idx",
    "pg_id",
    "year",
    "month",
    "year_idx",
    "month_idx",
    "year_month_idx"]

cols_lags = [
    "delta_1",
    "delta_2",
    "delta_3",
    "delta_4",
    "delta_5",
    "delta_6",
    "delta_7"
]

df_background = df.copy()

In [None]:
for ii, median_var in enumerate(cols_feats):
    print(median_var)

    df = df_background.copy()
    df.loc[df["pg_id"].isin(pgid_groupa + pgid_groupb),median_var] = df[median_var].median()


    subset = df[cols_feats+cols_ids]

    ##
    ## Fill in missing grid cells (e.g. water)
    ## 
    all_cells = product(
                    list(range(max(subset["year_month_idx"])+1)),
                    list(range(max(subset["col_idx"])+1)),
                    list(range(max(subset["row_idx"])+1))
                    )

    all_cells = pd.DataFrame(all_cells,
                             columns=["year_month_idx",
                                      "col_idx",
                                      "row_idx"])

    subset = pd.merge(subset, all_cells, how="outer",
                      on=["year_month_idx",
                          "col_idx",
                          "row_idx"])

    subset["isnan"] = subset[cols_feats].apply(lambda x: int(any([isnan(a) for a in x])), axis=1)
    subset.fillna(0, inplace=True)

    X_grouped = subset.groupby(["year_month_idx",
                              "col_idx",
                              "row_idx"])[cols_feats+["isnan"]].mean()
    X_grouped.head()

    arr = X_grouped.values.reshape((len(X_grouped.index.unique(level=0)),
                                  len(X_grouped.index.unique(level=1)),
                                  len(X_grouped.index.unique(level=2)),
                                  len(cols_feats)+1))

    del subset
    gc.collect()

    X = arr[:,:,:,:]
    Y = arr[:,:,:,0]

    Y1 = Y[1:] - Y[0:-1]
    Y2 = Y[2:] - Y[0:-2]
    Y3 = Y[3:] - Y[0:-3]
    Y4 = Y[4:] - Y[0:-4]
    Y5 = Y[5:] - Y[0:-5]
    Y6 = Y[6:] - Y[0:-6]
    Y7 = Y[7:] - Y[0:-7]

    filler1 = np.full_like(np.zeros((1,178,169)),np.NaN)
    filler2 = np.full_like(np.zeros((2,178,169)),np.NaN)
    filler3 = np.full_like(np.zeros((3,178,169)),np.NaN)
    filler4 = np.full_like(np.zeros((4,178,169)),np.NaN)
    filler5 = np.full_like(np.zeros((5,178,169)),np.NaN)
    filler6 = np.full_like(np.zeros((6,178,169)),np.NaN)
    filler7 = np.full_like(np.zeros((7,178,169)),np.NaN)

    Y1 = np.concatenate((Y1, filler1), axis=0)
    Y2 = np.concatenate((Y2, filler2), axis=0)
    Y3 = np.concatenate((Y3, filler3), axis=0)
    Y4 = np.concatenate((Y4, filler4), axis=0)
    Y5 = np.concatenate((Y5, filler5), axis=0)
    Y6 = np.concatenate((Y6, filler6), axis=0)
    Y7 = np.concatenate((Y7, filler7), axis=0)

    YDelta = np.stack((Y1,Y2,Y3,Y4,Y5,Y6,Y7), axis=3)

    del Y1
    del Y2
    del Y3
    del Y4
    del Y5
    del Y6
    del Y7
    gc.collect()

    pred_months = 12

    all_preds = []

    for ii in range(0,X.shape[0]):
        all_preds.append( 
            np.squeeze( 
                model.predict( 
                    np.array([X[max(0,ii-pred_months+1):(ii+1)]])
                )
            )
        )

    gc.collect()

#     np.save("../../supplemental_data/feature_dropout/bjr_all_preds_drop_in_"+median_var+".npy", all_preds)

In [None]:
for ii, median_var in enumerate(cols_feats):
    print(median_var)
    df = df_background.copy()

    df.loc[~df["pg_id"].isin(pgid_groupa + pgid_groupb),median_var] = df[median_var].median()


    subset = df[cols_feats+cols_ids]

    ##
    ## Fill in missing grid cells (e.g. water)
    ## 
    all_cells = product(
                    list(range(max(subset["year_month_idx"])+1)),
                    list(range(max(subset["col_idx"])+1)),
                    list(range(max(subset["row_idx"])+1))
                    )

    all_cells = pd.DataFrame(all_cells,
                             columns=["year_month_idx",
                                      "col_idx",
                                      "row_idx"])

    subset = pd.merge(subset, all_cells, how="outer",
                      on=["year_month_idx",
                          "col_idx",
                          "row_idx"])

    subset["isnan"] = subset[cols_feats].apply(lambda x: int(any([isnan(a) for a in x])), axis=1)
    subset.fillna(0, inplace=True)

    X_grouped = subset.groupby(["year_month_idx",
                              "col_idx",
                              "row_idx"])[cols_feats+["isnan"]].mean()
    X_grouped.head()

    arr = X_grouped.values.reshape((len(X_grouped.index.unique(level=0)),
                                  len(X_grouped.index.unique(level=1)),
                                  len(X_grouped.index.unique(level=2)),
                                  len(cols_feats)+1))

    del subset
    gc.collect()

    X = arr[:,:,:,:]
    Y = arr[:,:,:,0]

    Y1 = Y[1:] - Y[0:-1]
    Y2 = Y[2:] - Y[0:-2]
    Y3 = Y[3:] - Y[0:-3]
    Y4 = Y[4:] - Y[0:-4]
    Y5 = Y[5:] - Y[0:-5]
    Y6 = Y[6:] - Y[0:-6]
    Y7 = Y[7:] - Y[0:-7]

    filler1 = np.full_like(np.zeros((1,178,169)),np.NaN)
    filler2 = np.full_like(np.zeros((2,178,169)),np.NaN)
    filler3 = np.full_like(np.zeros((3,178,169)),np.NaN)
    filler4 = np.full_like(np.zeros((4,178,169)),np.NaN)
    filler5 = np.full_like(np.zeros((5,178,169)),np.NaN)
    filler6 = np.full_like(np.zeros((6,178,169)),np.NaN)
    filler7 = np.full_like(np.zeros((7,178,169)),np.NaN)

    Y1 = np.concatenate((Y1, filler1), axis=0)
    Y2 = np.concatenate((Y2, filler2), axis=0)
    Y3 = np.concatenate((Y3, filler3), axis=0)
    Y4 = np.concatenate((Y4, filler4), axis=0)
    Y5 = np.concatenate((Y5, filler5), axis=0)
    Y6 = np.concatenate((Y6, filler6), axis=0)
    Y7 = np.concatenate((Y7, filler7), axis=0)

    YDelta = np.stack((Y1,Y2,Y3,Y4,Y5,Y6,Y7), axis=3)

    del Y1
    del Y2
    del Y3
    del Y4
    del Y5
    del Y6
    del Y7
    gc.collect()

    pred_months = 12

    all_preds = []

    for ii in range(0,X.shape[0]):
        all_preds.append( 
            np.squeeze( 
                model.predict( 
                    np.array([X[max(0,ii-pred_months+1):(ii+1)]])
                )
            )
        )

    gc.collect()

#     np.save("../../supplemental_data/feature_dropout/bjr_all_preds_drop_out_"+median_var+".npy", all_preds)

In [None]:
all_preds = np.load("../../supplemental_data/competition_model/competition_entry_predictions.npy")

out_df = df_background[["pg_id","col_idx","row_idx","month_id","year","year_month_idx",
                        "delta_1","delta_2","delta_3","delta_4","delta_5","delta_6","delta_7"]].copy()
pg_col_row = df[["pg_id","col_idx","row_idx"]].drop_duplicates()
pg_col_row = pd.concat([pg_col_row] * 7)
pg_col_row["month_id"] = ([489]*10677) + ([490]*10677) + ([491]*10677) + ([492]*10677) + ([493]*10677) + ([494]*10677) + ([495]*10677)
pg_col_row["year_month_idx"] = pg_col_row["month_id"] - 121

print(out_df.shape)
print(pg_col_row.shape)
out_df = pd.concat([out_df,pg_col_row])
out_df.reset_index(inplace=True)
print(out_df.shape)


    
for median_var in cols_feats:
    new_preds = np.load("../../supplemental_data/feature_dropout/bjr_all_preds_drop_in_"+median_var+".npy")
    
#     future = all_preds[-1,:,:,:]
    
    # out_df[["pred_l1","pred_l2","pred_l3","pred_l4","pred_l5","pred_l6"]] = None

    preds_l1 = [None] * out_df.shape[0]
    gc.collect()
    preds_l2 = [None] * out_df.shape[0]
    gc.collect()
    preds_l3 = [None] * out_df.shape[0]
    gc.collect()
    preds_l4 = [None] * out_df.shape[0]
    gc.collect()
    preds_l5 = [None] * out_df.shape[0]
    gc.collect()
    preds_l6 = [None] * out_df.shape[0]
    gc.collect()
    preds_l7 = [None] * out_df.shape[0]
    gc.collect()
    
    preds_l1_b = [None] * out_df.shape[0]
    gc.collect()
    preds_l2_b = [None] * out_df.shape[0]
    gc.collect()
    preds_l3_b = [None] * out_df.shape[0]
    gc.collect()
    preds_l4_b = [None] * out_df.shape[0]
    gc.collect()
    preds_l5_b = [None] * out_df.shape[0]
    gc.collect()
    preds_l6_b = [None] * out_df.shape[0]
    gc.collect()
    preds_l7_b = [None] * out_df.shape[0]
    gc.collect()

    print("\tLoop time, baby...")

    jj = 0

    for ii, row in out_df.iterrows():

        if jj%1000000 == 0:
            print(f"\t{jj} of {out_df.shape[0]}")
            gc.collect()

        col_idx = int(row["col_idx"])
        row_idx = int(row["row_idx"])
        year_month_idx = int(row["year_month_idx"])

        if year_month_idx > 0:
            try:
                preds_l1[jj] = all_preds[year_month_idx-1,col_idx,row_idx,0]
                preds_l1_b[jj] = new_preds[year_month_idx-1,col_idx,row_idx,0]
            except:
                pass

        if year_month_idx > 1:
            try:
                preds_l2[jj] = all_preds[year_month_idx-2,col_idx,row_idx,1]
                preds_l2_b[jj] = new_preds[year_month_idx-2,col_idx,row_idx,1]
            except:
                pass

        if year_month_idx > 2:
            try:
                preds_l3[jj] = all_preds[year_month_idx-3,col_idx,row_idx,2]
                preds_l3_b[jj] = new_preds[year_month_idx-3,col_idx,row_idx,2]
            except:
                pass

        if year_month_idx > 3:
            try:
                preds_l4[jj] = all_preds[year_month_idx-4,col_idx,row_idx,3]
                preds_l4_b[jj] = new_preds[year_month_idx-4,col_idx,row_idx,3]
            except:
                pass

        if year_month_idx > 4:
            try:
                preds_l5[jj] = all_preds[year_month_idx-5,col_idx,row_idx,4]
                preds_l5_b[jj] = new_preds[year_month_idx-5,col_idx,row_idx,4]
            except:
                pass

        if year_month_idx > 5:
            try:
                preds_l6[jj] = all_preds[year_month_idx-6,col_idx,row_idx,5]
                preds_l6_b[jj] = new_preds[year_month_idx-6,col_idx,row_idx,5]
            except:
                pass

        if year_month_idx > 6:
            try:
                preds_l7[jj] = all_preds[year_month_idx-7,col_idx,row_idx,6]
                preds_l7_b[jj] = new_preds[year_month_idx-7,col_idx,row_idx,6]
            except:
                pass

        jj = jj+1

    out_df["Radford_clstm_s1"] = preds_l1
    out_df["Radford_clstm_s2"] = preds_l2
    out_df["Radford_clstm_s3"] = preds_l3
    out_df["Radford_clstm_s4"] = preds_l4
    out_df["Radford_clstm_s5"] = preds_l5
    out_df["Radford_clstm_s6"] = preds_l6
    out_df["Radford_clstm_s7"] = preds_l7
    out_df["Radford_clstm_s1_drop_in_"+median_var] = preds_l1_b
    out_df["Radford_clstm_s2_drop_in_"+median_var] = preds_l2_b
    out_df["Radford_clstm_s3_drop_in_"+median_var] = preds_l3_b
    out_df["Radford_clstm_s4_drop_in_"+median_var] = preds_l4_b
    out_df["Radford_clstm_s5_drop_in_"+median_var] = preds_l5_b
    out_df["Radford_clstm_s6_drop_in_"+median_var] = preds_l6_b
    out_df["Radford_clstm_s7_drop_in_"+median_var] = preds_l7_b
    
for median_var in cols_feats:
    new_preds = np.load("../../supplemental_data/feature_dropout/bjr_all_preds_drop_out_"+median_var+".npy")
    
    # out_df[["pred_l1","pred_l2","pred_l3","pred_l4","pred_l5","pred_l6"]] = None

    preds_l1 = [None] * out_df.shape[0]
    gc.collect()
    preds_l2 = [None] * out_df.shape[0]
    gc.collect()
    preds_l3 = [None] * out_df.shape[0]
    gc.collect()
    preds_l4 = [None] * out_df.shape[0]
    gc.collect()
    preds_l5 = [None] * out_df.shape[0]
    gc.collect()
    preds_l6 = [None] * out_df.shape[0]
    gc.collect()
    preds_l7 = [None] * out_df.shape[0]
    gc.collect()
    
    preds_l1_b = [None] * out_df.shape[0]
    gc.collect()
    preds_l2_b = [None] * out_df.shape[0]
    gc.collect()
    preds_l3_b = [None] * out_df.shape[0]
    gc.collect()
    preds_l4_b = [None] * out_df.shape[0]
    gc.collect()
    preds_l5_b = [None] * out_df.shape[0]
    gc.collect()
    preds_l6_b = [None] * out_df.shape[0]
    gc.collect()
    preds_l7_b = [None] * out_df.shape[0]
    gc.collect()

    print("\tLoop time, baby.")

    jj = 0

    for ii, row in out_df.iterrows():

        if jj%1000000 == 0:
            print(f"\t{jj} of {out_df.shape[0]}")
            gc.collect()

        col_idx = int(row["col_idx"])
        row_idx = int(row["row_idx"])
        year_month_idx = int(row["year_month_idx"])

        if year_month_idx > 0:
            try:
                preds_l1[jj] = all_preds[year_month_idx-1,col_idx,row_idx,0]
                preds_l1_b[jj] = new_preds[year_month_idx-1,col_idx,row_idx,0]
            except:
                pass

        if year_month_idx > 1:
            try:
                preds_l2[jj] = all_preds[year_month_idx-2,col_idx,row_idx,1]
                preds_l2_b[jj] = new_preds[year_month_idx-2,col_idx,row_idx,1]
            except:
                pass

        if year_month_idx > 2:
            try:
                preds_l3[jj] = all_preds[year_month_idx-3,col_idx,row_idx,2]
                preds_l3_b[jj] = new_preds[year_month_idx-3,col_idx,row_idx,2]
            except:
                pass

        if year_month_idx > 3:
            try:
                preds_l4[jj] = all_preds[year_month_idx-4,col_idx,row_idx,3]
                preds_l4_b[jj] = new_preds[year_month_idx-4,col_idx,row_idx,3]
            except:
                pass

        if year_month_idx > 4:
            try:
                preds_l5[jj] = all_preds[year_month_idx-5,col_idx,row_idx,4]
                preds_l5_b[jj] = new_preds[year_month_idx-5,col_idx,row_idx,4]
            except:
                pass

        if year_month_idx > 5:
            try:
                preds_l6[jj] = all_preds[year_month_idx-6,col_idx,row_idx,5]
                preds_l6_b[jj] = new_preds[year_month_idx-6,col_idx,row_idx,5]
            except:
                pass

        if year_month_idx > 6:
            try:
                preds_l7[jj] = all_preds[year_month_idx-7,col_idx,row_idx,6]
                preds_l7_b[jj] = new_preds[year_month_idx-7,col_idx,row_idx,6]
            except:
                pass

        jj = jj+1

    out_df["Radford_clstm_s1"] = preds_l1
    out_df["Radford_clstm_s2"] = preds_l2
    out_df["Radford_clstm_s3"] = preds_l3
    out_df["Radford_clstm_s4"] = preds_l4
    out_df["Radford_clstm_s5"] = preds_l5
    out_df["Radford_clstm_s6"] = preds_l6
    out_df["Radford_clstm_s7"] = preds_l7
    out_df["Radford_clstm_s1_drop_out_"+median_var] = preds_l1_b
    out_df["Radford_clstm_s2_drop_out_"+median_var] = preds_l2_b
    out_df["Radford_clstm_s3_drop_out_"+median_var] = preds_l3_b
    out_df["Radford_clstm_s4_drop_out_"+median_var] = preds_l4_b
    out_df["Radford_clstm_s5_drop_out_"+median_var] = preds_l5_b
    out_df["Radford_clstm_s6_drop_out_"+median_var] = preds_l6_b
    out_df["Radford_clstm_s7_drop_out_"+median_var] = preds_l7_b
    
# out_df.to_csv("../../data/competition_model/feature_dropout/bjr_all_preds_drop.csv", index=False)
    
