# High Resolution Conflict Forecasting with Spatial Convolutions and Long Short-Term Memory

## Replication Archive

[Benjamin J. Radford](https://www.benradford.com)    
Assistant Professor  
UNC Charlotte  
bradfor7@uncc.edu  

This file produces specially-formatted .csv files for the predictions of the Single Feature Model.

## Imports and seeds

In [None]:
import sys
import os
import gc
import logging

import pandas as pd
import numpy as np
from datetime import datetime

from sklearn.ensemble import RandomForestRegressor
from joblib import dump, load

from itertools import product
from math import isnan

import views
from views import Period, Model, Downsampling
from views.utils.data import assign_into_df
from views.apps.transforms import lib as translib
from views.apps.evaluation import lib as evallib, feature_importance as fi
from views.apps.model import api
from views.apps.extras import extras

import keras
from keras.models import Model
from keras.layers import Input, ConvLSTM2D, Activation, Conv3D, BatchNormalization, Dropout, Bidirectional, GaussianNoise
from keras import optimizers

import tensorflow as tf

import random
import geoplot as gplt
import contextily as ctx

import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import AxesGrid
from mpl_toolkits.axes_grid1 import make_axes_locatable

from numpy.random import seed
seed(1234)
tf.random.set_seed(1234)

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

In [None]:
# Do you wish to fetch the latest public data? If so, change False to True and run this cell
# Cells below will fail if this is not run if you haven't imported data yourself yet.
redownload_data = False

if redownload_data:
    path_zip = views.apps.data.public.fetch_latest_zip_from_website(path_dir_destination=views.DIR_SCRATCH)
    views.apps.data.public.import_tables_and_geoms(tables=views.TABLES, geometries=views.GEOMETRIES, path_zip=path_zip)

dataset = views.DATASETS["pgm_africa_imp_0"]
df = dataset.gdf
df.reset_index(inplace=True)

update = pd.read_csv("../../data/pgm.csv")
df = pd.merge(df[["geom","pg_id","month_id"]], update, on=["pg_id","month_id"])

In [None]:
df = df.loc[(df["year"]<2021) & (df["year"]>1989)]
df = df.loc[(df["year"]<2020) | (df["month"]<9)]
df["coordx"] = df["geom"].apply(lambda x: x.centroid.x)
df["coordy"] = df["geom"].apply(lambda y: y.centroid.y)
df["col_idx"] = [int(a) for a in list((df["coordx"] - df["coordx"].min())*2)]
df["row_idx"] = [int(a) for a in list((df["coordy"] - df["coordy"].min())*2)]
df["year_idx"] = [int(a) for a in list((df["year"] - df["year"].min()))]
df["month_idx"] = [int(a) for a in list((df["month"] - df["month"].min()))]
df["year_month_idx"] = [int(a) for a in list((df["month_id"] - df["month_id"].min()))]

df.drop("geom", inplace=True, axis=1)


##
## Make Lags
##
df1 = df[["year_month_idx","pg_id","ln_ged_best_sb"]].copy()
df2 = df[["year_month_idx","pg_id","ln_ged_best_sb"]].copy()
df3 = df[["year_month_idx","pg_id","ln_ged_best_sb"]].copy()
df4 = df[["year_month_idx","pg_id","ln_ged_best_sb"]].copy()
df5 = df[["year_month_idx","pg_id","ln_ged_best_sb"]].copy()
df6 = df[["year_month_idx","pg_id","ln_ged_best_sb"]].copy()
df7 = df[["year_month_idx","pg_id","ln_ged_best_sb"]].copy()

df1["year_month_idx"] = df1["year_month_idx"]+1
df2["year_month_idx"] = df2["year_month_idx"]+2
df3["year_month_idx"] = df3["year_month_idx"]+3
df4["year_month_idx"] = df4["year_month_idx"]+4
df5["year_month_idx"] = df5["year_month_idx"]+5
df6["year_month_idx"] = df6["year_month_idx"]+6
df7["year_month_idx"] = df7["year_month_idx"]+7

df1.columns = ["year_month_idx","pg_id","ln_ged_best_sb_l1"]
df2.columns = ["year_month_idx","pg_id","ln_ged_best_sb_l2"]
df3.columns = ["year_month_idx","pg_id","ln_ged_best_sb_l3"]
df4.columns = ["year_month_idx","pg_id","ln_ged_best_sb_l4"]
df5.columns = ["year_month_idx","pg_id","ln_ged_best_sb_l5"]
df6.columns = ["year_month_idx","pg_id","ln_ged_best_sb_l6"]
df7.columns = ["year_month_idx","pg_id","ln_ged_best_sb_l7"]

df = pd.merge(df,df1,how="left",on=["year_month_idx","pg_id"])
df = pd.merge(df,df2,how="left",on=["year_month_idx","pg_id"])
df = pd.merge(df,df3,how="left",on=["year_month_idx","pg_id"])
df = pd.merge(df,df4,how="left",on=["year_month_idx","pg_id"])
df = pd.merge(df,df5,how="left",on=["year_month_idx","pg_id"])
df = pd.merge(df,df6,how="left",on=["year_month_idx","pg_id"])
df = pd.merge(df,df7,how="left",on=["year_month_idx","pg_id"])

df["delta_1"] = df["ln_ged_best_sb"] - df["ln_ged_best_sb_l1"]
df["delta_2"] = df["ln_ged_best_sb"] - df["ln_ged_best_sb_l2"]
df["delta_3"] = df["ln_ged_best_sb"] - df["ln_ged_best_sb_l3"]
df["delta_4"] = df["ln_ged_best_sb"] - df["ln_ged_best_sb_l4"]
df["delta_5"] = df["ln_ged_best_sb"] - df["ln_ged_best_sb_l5"]
df["delta_6"] = df["ln_ged_best_sb"] - df["ln_ged_best_sb_l6"]
df["delta_7"] = df["ln_ged_best_sb"] - df["ln_ged_best_sb_l7"]

del df1
del df2
del df3
del df4
del df5
del df6
del df7

gc.collect()

In [None]:
all_preds = np.load("../../supplemental_data/single_feature/single_feature_predictions.npy")
out_df = df[["pg_id","col_idx","row_idx","month_id","year","year_month_idx"]].copy()

future = all_preds[-1,:,:,:]
pg_col_row = df[["pg_id","col_idx","row_idx"]].drop_duplicates()
pg_col_row = pd.concat([pg_col_row] * 7)
pg_col_row["month_id"] = ([489]*10677) + ([490]*10677) + ([491]*10677) + ([492]*10677) + ([493]*10677) + ([494]*10677) + ([495]*10677)
pg_col_row["year_month_idx"] = pg_col_row["month_id"] - 121

print(out_df.shape)
print(pg_col_row.shape)
out_df = pd.concat([out_df,pg_col_row])
print(out_df.shape)

out_df.reset_index(inplace=True)

In [None]:
# out_df[["pred_l1","pred_l2","pred_l3","pred_l4","pred_l5","pred_l6"]] = None

preds_l1 = [None] * out_df.shape[0]
gc.collect()
preds_l2 = [None] * out_df.shape[0]
gc.collect()
preds_l3 = [None] * out_df.shape[0]
gc.collect()
preds_l4 = [None] * out_df.shape[0]
gc.collect()
preds_l5 = [None] * out_df.shape[0]
gc.collect()
preds_l6 = [None] * out_df.shape[0]
gc.collect()
preds_l7 = [None] * out_df.shape[0]
gc.collect()

print("Loop time, baby.")

jj = 0

for ii, row in out_df.iterrows():
    
    if jj%100000 == 0:
        print(f"{jj} of {out_df.shape[0]}")
        gc.collect()
    
    col_idx = int(row["col_idx"])
    row_idx = int(row["row_idx"])
    year_month_idx = int(row["year_month_idx"])
    
    if year_month_idx > 0:
        try:
            preds_l1[jj] = all_preds[year_month_idx-1,col_idx,row_idx,0]
        except:
            pass
        
    if year_month_idx > 1:
        try:
            preds_l2[jj] = all_preds[year_month_idx-2,col_idx,row_idx,1]
        except:
            pass
        
    if year_month_idx > 2:
        try:
            preds_l3[jj] = all_preds[year_month_idx-3,col_idx,row_idx,2]
        except:
            pass
        
    if year_month_idx > 3:
        try:
            preds_l4[jj] = all_preds[year_month_idx-4,col_idx,row_idx,3]
        except:
            pass
        
    if year_month_idx > 4:
        try:
            preds_l5[jj] = all_preds[year_month_idx-5,col_idx,row_idx,4]
        except:
            pass
        
    if year_month_idx > 5:
        try:
            preds_l6[jj] = all_preds[year_month_idx-6,col_idx,row_idx,5]
        except:
            pass

    if year_month_idx > 6:
        try:
            preds_l7[jj] = all_preds[year_month_idx-7,col_idx,row_idx,6]
        except:
            pass

    jj = jj+1


In [None]:
out_df["Radford_clstm_s1"] = preds_l1
out_df["Radford_clstm_s2"] = preds_l2
out_df["Radford_clstm_s3"] = preds_l3
out_df["Radford_clstm_s4"] = preds_l4
out_df["Radford_clstm_s5"] = preds_l5
out_df["Radford_clstm_s6"] = preds_l6
out_df["Radford_clstm_s7"] = preds_l7


In [None]:
# out_df.loc[out_df["year"].isin([2014,2015,2016])][["pg_id",
#     "month_id",
#     "Radford_clstm_s2",
#     "Radford_clstm_s3",
#     "Radford_clstm_s4",
#     "Radford_clstm_s5",
#     "Radford_clstm_s6",
#     "Radford_clstm_s7"]].to_csv("../../data/single_feature/forecasts/updated_ViEWSpred_competition_radford_set3.csv", index=False)

In [None]:
# out_df.loc[out_df["year"].isin([2017,2018,2019])][["pg_id",
#     "month_id",
#     "Radford_clstm_s2",
#     "Radford_clstm_s3",
#     "Radford_clstm_s4",
#     "Radford_clstm_s5",
#     "Radford_clstm_s6",
#     "Radford_clstm_s7"]].to_csv("../../data/single_feature/forecasts/updated_ViEWSpred_competition_radford_set2.csv", index=False)

In [None]:
# out_df.loc[out_df["month_id"].isin([490,491,492,493,494,495])][["pg_id",
#     "month_id",
#     "Radford_clstm_s2",
#     "Radford_clstm_s3",
#     "Radford_clstm_s4",
#     "Radford_clstm_s5",
#     "Radford_clstm_s6",
#     "Radford_clstm_s7"]].to_csv("../../data/single_feature/forecasts/updated_ViEWSpred_competition_radford_set1.csv", index=False)