### Forcasting - Syria

In [1]:
import pandas as pd 
import numpy as np 

import matplotlib.pyplot as plt 
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import tensorflow
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout, Flatten
from keras.preprocessing.sequence import TimeseriesGenerator
from tensorflow.keras import regularizers


pd.options.display.max_rows = 999
pd.options.display.max_columns = 999

Using TensorFlow backend.


In [2]:
df = pd.read_csv('data/conflict-clusters.csv')

In [4]:
# Locating Syrian Events
data = df.loc[df['country'] == 'Syria', :]

In [6]:
data.shape

(65966, 58)

In [7]:
data.drop(columns=['year', 
                   'sub_event_type',
                   'actor1',
                   'assoc_actor_1',
                   'inter1',
                   'actor2',
                   'assoc_actor_2',
                   'inter2',
                   'interaction',
                   'region',
                   'admin2',
                   'admin3',
                   'location',
                   'timestamp',
                   'country',
                   'MA/CU'], inplace=True)

In [8]:
#  Calculating weekly event counts
df = (pd.to_datetime(data['event_date']).dt.floor('d').value_counts().rename_axis('date').reset_index(name='count'))

In [10]:
data['event_date'] = pd.to_datetime(data['event_date'])

In [11]:
# Merging Syrian events and weekly event counts
data = pd.merge(data, df, left_on='event_date', right_on='date', how='outer')

In [13]:
data.set_index('event_date', inplace=True)

In [14]:
data.drop(columns='date', inplace=True)

In [15]:
pd.get_dummies(data=data, 
               columns=['event_type',
                        'admin1'])

Unnamed: 0_level_0,latitude,longitude,geo_precision,fatalities,users,frac,month,missing_data,Exchange rate USD,Unemployment rate,Rule of law index (-2.5 weak; 2.5 strong),Government effectiveness index (-2.5 weak; 2.5 strong),Control of corruption (-2.5 weak; 2.5 strong),Regulatory quality index (-2.5 weak; 2.5 strong),Voice and accountability index (-2.5 weak; 2.5 strong),Political stability index (-2.5 weak; 2.5 strong),Corruption Perceptions Index 100 = no corruption,Political rights index 7 (weak) - 1 (strong),Civil liberties index 7 (weak) - 1 (strong),Short-term political risk (1=low 7=high),Medium/long-term political risk (1=low 7=high),Political violence risk (1=low 7=high),Mobile phone subscribers per 100 people,Property rights index (0-100),Freedom from corruption index (0-100),Business freedom index (0-100),Labor freedom index (0-100),Life expectancy in years,Fragile state index 0 (low) - 120 (high),Security threats index 0 (low) - 10 (high),State legitimacy index 0 (high) - 10 (low),Public services index 0 (high) - 10 (low),Human rights and rule of law index 0 (high) - 10 (low),Refugees and displaced persons index 0 (low) - 10 (high),Labor force million people,Labor force participation rate,Percent urban population,Population density people per square km,db_50_clusters,count,event_type_Battles,event_type_Explosions/Remote violence,event_type_Protests,event_type_Riots,event_type_Violence against civilians,admin1_Al-Hasakeh,admin1_Aleppo,admin1_Ar-Raqqa,admin1_As-Sweida,admin1_Damascus,admin1_Dar'a,admin1_Deir-ez-Zor,admin1_Hama,admin1_Homs,admin1_Idleb,admin1_Lattakia,admin1_Quneitra,admin1_Rural Damascus,admin1_Tartous
event_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1
2017-01-01,33.6039,36.1361,1,2,38,56,1,1.0,213.9760,8.36,-2.09,-1.77,-1.560,-1.830,-1.970,-2.62,14,7,7,7,7,7,90.00,37,30,66,56,70.97,110.6,9.8,9.9,9.2,9.8,9.8,4.83,41.52,53.50,93.0,1,72,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
2017-01-01,33.5165,36.4897,1,1,38,56,1,1.0,213.9760,8.36,-2.09,-1.77,-1.560,-1.830,-1.970,-2.62,14,7,7,7,7,7,90.00,37,30,66,56,70.97,110.6,9.8,9.9,9.2,9.8,9.8,4.83,41.52,53.50,93.0,1,72,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
2017-01-01,33.6094,36.1234,2,0,38,56,1,1.0,213.9760,8.36,-2.09,-1.77,-1.560,-1.830,-1.970,-2.62,14,7,7,7,7,7,90.00,37,30,66,56,70.97,110.6,9.8,9.9,9.2,9.8,9.8,4.83,41.52,53.50,93.0,1,72,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
2017-01-01,34.8832,38.8700,2,0,38,56,1,1.0,213.9760,8.36,-2.09,-1.77,-1.560,-1.830,-1.970,-2.62,14,7,7,7,7,7,90.00,37,30,66,56,70.97,110.6,9.8,9.9,9.2,9.8,9.8,4.83,41.52,53.50,93.0,1,72,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
2017-01-01,34.9568,36.9580,1,0,38,56,1,1.0,213.9760,8.36,-2.09,-1.77,-1.560,-1.830,-1.970,-2.62,14,7,7,7,7,7,90.00,37,30,66,56,70.97,110.6,9.8,9.9,9.2,9.8,9.8,4.83,41.52,53.50,93.0,1,72,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-12-31,36.5111,36.8655,1,0,17,55,12,1.0,514.9994,8.37,-2.07,-1.72,-1.595,-1.815,-1.965,-2.68,13,7,7,7,7,7,95.55,37,20,50,58,71.38,111.5,9.8,9.9,9.4,10.0,10.0,4.80,40.76,53.83,92.5,1,33,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0
2019-12-31,36.6978,38.9548,1,0,17,55,12,1.0,514.9994,8.37,-2.07,-1.72,-1.595,-1.815,-1.965,-2.68,13,7,7,7,7,7,95.55,37,20,50,58,71.38,111.5,9.8,9.9,9.4,10.0,10.0,4.80,40.76,53.83,92.5,1,33,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
2019-12-31,35.6159,36.7188,1,0,17,55,12,1.0,514.9994,8.37,-2.07,-1.72,-1.595,-1.815,-1.965,-2.68,13,7,7,7,7,7,95.55,37,20,50,58,71.38,111.5,9.8,9.9,9.4,10.0,10.0,4.80,40.76,53.83,92.5,1,33,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
2019-12-31,35.6045,36.7115,1,0,17,55,12,1.0,514.9994,8.37,-2.07,-1.72,-1.595,-1.815,-1.965,-2.68,13,7,7,7,7,7,95.55,37,20,50,58,71.38,111.5,9.8,9.9,9.4,10.0,10.0,4.80,40.76,53.83,92.5,1,33,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0


In [16]:
data.drop(columns=['event_type', 'admin1'], inplace=True)

In [18]:
y = data['count']
X = data.drop(columns=['count'])


In [19]:
X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=False)

In [20]:
minmax = MinMaxScaler()

In [21]:
# Normalizing numeric data
X_train = minmax.fit_transform(X_train)
X_test = minmax.fit_transform(X_test)

In [22]:
X_train.shape

(59369, 39)

In [23]:
train = TimeseriesGenerator(X_train, y_train, length=3, batch_size=64)
test = TimeseriesGenerator(X_test, y_test, length=3, batch_size=64)

In [24]:
batch_x, batch_y = train[0]

In [25]:
batch_x.shape

(64, 3, 39)

In [26]:
# The LSTM design
model = Sequential()
# First LSTM layer with Dropout 
model.add(LSTM(64, activation='relu', return_sequences=True, input_shape=(3, 39)))
model.add(Dropout(0.5))
# Second LSTM layer
model.add(LSTM(units=64, activation='relu', return_sequences=True))
model.add(Dropout(0.5))
# Third LSTM layer
model.add(LSTM(units=64, activation='relu', return_sequences=True))
model.add(Dropout(0.5))
# The output layer

model.add(Flatten())
model.add(Dense(1))

# Compiling the RNN
model.compile(optimizer='Adam', loss='mean_absolute_percentage_error')
model.summary()

Instructions for updating:
Colocations handled automatically by placer.
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 3, 64)             26624     
_________________________________________________________________
dropout_1 (Dropout)          (None, 3, 64)             0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 3, 64)             33024     
_________________________________________________________________
dropout_2 (Dropout)          (None, 3, 64)             0         
_________________________________________________________________
lstm_3 (LSTM)                (None, 3, 64)             33024     
_________________________________________________________________
dropout_3 (Dropout)          (None, 3, 64)             0         
________________________________________________

In [27]:
# Fitting to the training set
hist = model.fit_generator(train, validation_data=(test), epochs=20)

Instructions for updating:
Use tf.cast instead.
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
125/928 [===>..........................] - ETA: 6s - loss: 25.3986

KeyboardInterrupt: 

In [None]:

plt.style.use('ggplot')
plt.plot(hist.history['loss'], label='Train loss')
plt.plot(hist.history['val_loss'], label='Test loss')
plt.title('Syria Mean Absolute Percentage Error')
plt.xlabel('Epochs')
plt.ylabel('Percentage Error')
plt.legend()
plt.savefig('imgs/syrialoss.jpeg');

In [None]:
syria_preds = model.predict(test)

In [None]:
plt.style.use('ggplot')

In [None]:
def plot_predictions(test,predicted):
    plt.plot(test, label='True Values')
    plt.plot(predicted, label='Predicted Values')
    plt.title('Syria Forecast')
    plt.xlabel('Time')
    plt.ylabel('Weekly Count of Events')
    plt.legend()
    plt.savefig('imgs/syriaforecast.jpeg');

In [None]:
plot_predictions(y_test.values, syria_preds)

In [73]:
y_test.shape

(16492,)

In [65]:
len(preds)

16491