In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

from tensorflow.keras import utils
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, GRU
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator

In [2]:
df = pd.read_csv('data/clean_df.csv')

In [3]:
df.head()

Unnamed: 0,date,incident_id,governorate,district,area,target,main_category,sub_category,min_air_raids,max_air_raids,civilian_casualties,fatalities,woman_fatalities,child_fatalities,injured,woman_injured,child_injured,confirmed_time,time_of_day
0,2015-03-26,1,Sanaa,bani al-harith,Al-Rahabah,Al-Daylami Airbase,military_security_target,military site,1,2.0,0,0,0,0,0,0,0.0,2,morning
1,2015-03-26,2,Sanaa,bani al-harith,Airport,Sana'a International Airport,infrastructure,transport,1,2.0,0,0,0,0,0,0,0.0,2,morning
2,2015-03-26,3,Sanaa,bani al-harith,Al-Sonblah Neighbourhood,Residential Area,civilian,residential area,1,2.0,29,21,3,14,8,0,7.0,2,morning
3,2015-03-26,4,Sanaa,al-sab'ein,Al-Nahdain,Presidential Palace,military_security_target,military site,1,2.0,0,0,0,0,0,0,0.0,3,morning
4,2015-03-26,5,Sanaa,al-thawrah,Al-Nahdhah,Former 1st Armoured Division,military_security_target,military site,1,2.0,0,0,0,0,0,0,0.0,3,morning


In [4]:
df.set_index('date', inplace=True)
df.index = pd.to_datetime(df.index)

In [5]:
df.head()

Unnamed: 0_level_0,incident_id,governorate,district,area,target,main_category,sub_category,min_air_raids,max_air_raids,civilian_casualties,fatalities,woman_fatalities,child_fatalities,injured,woman_injured,child_injured,confirmed_time,time_of_day
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2015-03-26,1,Sanaa,bani al-harith,Al-Rahabah,Al-Daylami Airbase,military_security_target,military site,1,2.0,0,0,0,0,0,0,0.0,2,morning
2015-03-26,2,Sanaa,bani al-harith,Airport,Sana'a International Airport,infrastructure,transport,1,2.0,0,0,0,0,0,0,0.0,2,morning
2015-03-26,3,Sanaa,bani al-harith,Al-Sonblah Neighbourhood,Residential Area,civilian,residential area,1,2.0,29,21,3,14,8,0,7.0,2,morning
2015-03-26,4,Sanaa,al-sab'ein,Al-Nahdain,Presidential Palace,military_security_target,military site,1,2.0,0,0,0,0,0,0,0.0,3,morning
2015-03-26,5,Sanaa,al-thawrah,Al-Nahdhah,Former 1st Armoured Division,military_security_target,military site,1,2.0,0,0,0,0,0,0,0.0,3,morning


In [6]:
df = pd.get_dummies(df, columns=['district', 'main_category', 'sub_category', 'time_of_day', 'confirmed_time'])

In [7]:
df.head()

Unnamed: 0_level_0,incident_id,governorate,area,target,min_air_raids,max_air_raids,civilian_casualties,fatalities,woman_fatalities,child_fatalities,...,confirmed_time_23,confirmed_time_24,confirmed_time_3,confirmed_time_4,confirmed_time_5,confirmed_time_6,confirmed_time_7,confirmed_time_8,confirmed_time_9,confirmed_time_Unknown
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2015-03-26,1,Sanaa,Al-Rahabah,Al-Daylami Airbase,1,2.0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2015-03-26,2,Sanaa,Airport,Sana'a International Airport,1,2.0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2015-03-26,3,Sanaa,Al-Sonblah Neighbourhood,Residential Area,1,2.0,29,21,3,14,...,0,0,0,0,0,0,0,0,0,0
2015-03-26,4,Sanaa,Al-Nahdain,Presidential Palace,1,2.0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
2015-03-26,5,Sanaa,Al-Nahdhah,Former 1st Armoured Division,1,2.0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0


In [8]:
len(df['governorate'].unique())

22

In [29]:
df['label'] = pd.Categorical(df['governorate'])

In [31]:
df['label']

date
2015-03-26    Sanaa
2015-03-26    Sanaa
2015-03-26    Sanaa
2015-03-26    Sanaa
2015-03-26    Sanaa
              ...  
2020-12-31    Sanaa
2020-12-31    Sanaa
2020-12-31    Sanaa
2020-12-31    Sanaa
2020-12-31    Marib
Name: label, Length: 22485, dtype: category
Categories (22, object): ['Abyan', 'Aden', 'Amran', 'Bayda', ..., 'Sanaa', 'Shabwa', 'Taiz', 'lahj']

In [32]:
df['label'] = df['label'].cat.codes

In [33]:
df['label']

date
2015-03-26    18
2015-03-26    18
2015-03-26    18
2015-03-26    18
2015-03-26    18
              ..
2020-12-31    18
2020-12-31    18
2020-12-31    18
2020-12-31    18
2020-12-31    14
Name: label, Length: 22485, dtype: int8

In [38]:
X = df.drop(columns=['area', 'target', 'governorate'])
y = df['label']

In [39]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=6, shuffle=False)

In [40]:
ss = StandardScaler()
X_train_ss = ss.fit_transform(X_train)
X_test_ss = ss.transform(X_test)

In [None]:
train_sequences = TimeseriesGenerator(X_train_ss, y_train, length=3, batch_size=64)

test_sequences = TimeseriesGenerator(X_test_ss, y_test, length=3, batch_size=64)

train_sequences[0][0].shape

input_shape = train_sequences[0][0][0].shape

model = Sequential()
model.add(GRU(64, input_shape=input_shape, return_sequences=True)) # True if next layer is RNN
model.add(GRU(32, return_sequences=False)) # False if next layer is Dense

model.add(Dense(32, activation='relu'))

model.add(Dense(22, activation='softmax'))

model.compile(optimizer=Adam(lr=.0005), loss='categorical_crossentropy', metrics=['acc'])

history = model.fit(train_sequences, validation_data=test_sequences, epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50