# EXPLORATORY DATA ANALYSIS

In [1]:
import sqlite3
import pandas as pd
import numpy as np

In [2]:
conn = sqlite3.connect("../data/01_ACLED.db")
acled_monthly_adm0 = pd.read_sql_query("""
    SELECT * FROM TB005_ACLED_MONTHLY_ADM0_TIME_SERIES ORDER BY GID_0, EVENT_DATE_MONTH;
""", conn)

# add datetime index
acled_monthly_adm0 = acled_monthly_adm0.set_index(pd.DatetimeIndex(acled_monthly_adm0["EVENT_DATE_MONTH"]))

acled_monthly_adm0

DatabaseError: Execution failed on sql '
    SELECT * FROM TB005_ACLED_MONTHLY_ADM0_TIME_SERIES ORDER BY GID_0, EVENT_DATE_MONTH;
': no such table: TB005_ACLED_MONTHLY_ADM0_TIME_SERIES

## Seasonal Decomposition

In [None]:
import plotly.express as px

# monthly FAT
country_selection = ["AFG", "IRN", "MLI", "BFA", "NGA", "SYR"]

fig = px.line(acled_monthly_adm0[acled_monthly_adm0['GID_0'].isin(country_selection)], x="EVENT_DATE_MONTH", y="SUM(FATALITIES)", title='Monthly Fatalities', color="GID_0")
fig.show()

In [None]:
# Plot
fig, axes = plt.subplots(nrows=3, ncols=2, dpi=120, figsize=(10,6))
for i, ax in enumerate(axes.flatten()):
    data = acled_monthly_adm0[acled_monthly_adm0["GID_0"] == country_selection[i]]["SUM(FATALITIES)"]
    ax.plot(data, color='red', linewidth=1)
    # Decorations
    ax.set_title(country_selection[i])
    ax.xaxis.set_ticks_position('none')
    ax.yaxis.set_ticks_position('none')
    ax.spines["top"].set_alpha(0)
    ax.tick_params(labelsize=6)
plt.tight_layout()

In [None]:
from random import randrange
from pandas import Series
import seaborn as sns
import matplotlib.pyplot as plt
from pandas.plotting import register_matplotlib_converters
from statsmodels.tsa.seasonal import seasonal_decompose

register_matplotlib_converters()
sns.set_style("darkgrid")
plt.rc("figure", figsize=(16, 12))
plt.rc("font", size=13)

series = acled_monthly_adm0[acled_monthly_adm0["GID_0"] == "MLI"]["SUM(FATALITIES)"]
print(series)

result = seasonal_decompose(series, model='additive', period=12)
result.plot()
plt.show()

In [None]:
from statsmodels.graphics.tsaplots import plot_pacf
plot_pacf(series, lags=48)
plt.show()

In [None]:
series = acled_monthly_adm0[acled_monthly_adm0["GID_0"] == "MLI"]["SUM(FATALITIES)"]

series.describe().transpose()