In [17]:
import pandas as pd
import numpy as np
from fbprophet import Prophet
import statsmodels.api as sm
import matplotlib.pyplot as plt
from pylab import rcParams
import itertools
from copy import deepcopy

In [18]:
from warnings import filterwarnings

filterwarnings('ignore')

In [19]:
pd.set_option('display.float_format', lambda x: '%.2f' % x)
pd.set_option('display.max_columns', None)
# pd.set_option('display.max_colwidth', 1000)

In [20]:
df_date = pd.read_pickle('Pickles/df_date_2.pkl')
# creating a field for victims not including those "unharmed"
df_date['num_harmed'] = df_date['num_killed'] + df_date['num_injured']
df_date = df_date[['date', 'num_harmed']]
df_date = deepcopy(df_date[df_date['date'].dt.year >= 2014].reset_index())
df_date.drop('index', axis = 1, inplace = True)
df_date = df_date[['date', 'num_harmed']]

In [21]:
set(list(df_date.isnull().sum().values))=={0}

True

In [22]:
df_date['date'].dt.year.value_counts()

2016    366
2017    365
2015    365
2014    365
2018     90
Name: date, dtype: int64

In [23]:
future_dataframe_lengths = {}
for year in df_date['date'].dt.year.unique():
    try:
        future_dataframe_lengths[year] = len(pd.period_range(min(df_date[df_date['date'].dt.year==year+1]['date']), max(df_date['date']))) + 365
    except:
        future_dataframe_lengths[year] = 365

print(future_dataframe_lengths)

{2014: 1551, 2015: 1186, 2016: 820, 2017: 455, 2018: 365}


In [24]:
for i, year in enumerate(df_date['date'].dt.year.unique()):
    df_date.rename({'date': 'ds', 'num_harmed': 'y'}, axis = 1, inplace = True)
    harmed_prophet = Prophet(changepoint_prior_scale = 0.5, daily_seasonality = True, yearly_seasonality = True)
    harmed_prophet.fit(df_date[df_date['ds'].dt.year <= year])
    harmed_forecast = harmed_prophet.make_future_dataframe(periods=future_dataframe_lengths[year], freq='D')
    harmed_forecast = harmed_prophet.predict(harmed_forecast)
    harmed_forecast.rename({'ds': 'date', 'yhat': 'pred_' + str(year)}, axis = 1, inplace = True)
    df_date.rename({'ds': 'date', 'y': 'num_harmed'}, axis = 1, inplace = True)
    if i==0:
        d3_data = pd.merge(harmed_forecast[['date', 'pred_' + str(year)]], df_date, on = 'date', how='outer')
    else:
        d3_data = pd.merge(harmed_forecast[['date', 'pred_' + str(year)]], d3_data, on = 'date', how='outer')

In [25]:
keep_index = list(d3_data.iloc[0::10, :].index.values)

for column in d3_data.columns:
    if 'pred' in column:
        for i, index_value in enumerate(d3_data.index.values):
            if i not in keep_index:
                d3_data.loc[i, column] = None
        d3_data[column] = d3_data[column].interpolate(method = 'cubic')
        d3_data[column] = d3_data[column].ffill()

In [26]:
d3_data.tail(10)

Unnamed: 0,date,pred_2018,pred_2017,pred_2016,pred_2015,pred_2014,num_harmed
1906,2019-03-22,120.82,115.49,234.21,181.68,102.6,
1907,2019-03-23,115.88,110.54,228.65,176.66,97.61,
1908,2019-03-24,108.58,103.19,220.66,169.42,90.48,
1909,2019-03-25,98.67,93.19,210.01,159.72,80.98,
1910,2019-03-26,85.91,80.31,196.46,147.36,68.91,
1911,2019-03-27,85.91,80.31,196.46,147.36,68.91,
1912,2019-03-28,85.91,80.31,196.46,147.36,68.91,
1913,2019-03-29,85.91,80.31,196.46,147.36,68.91,
1914,2019-03-30,85.91,80.31,196.46,147.36,68.91,
1915,2019-03-31,85.91,80.31,196.46,147.36,68.91,


In [27]:
# harmed_forecast.columns

In [28]:
# harmed_prophet.changepoints[:5]

In [29]:
# exporting harmed_forecast predictions to assets repository for d3.js visualization
num_incidents_df = pd.read_pickle('Pickles/df_date_2.pkl')
num_incidents_df = num_incidents_df[['date', 'num_incidents']]
num_incidents_df.rename({'ds': 'date'}, axis = 1, inplace = True)
d3_data = pd.merge(d3_data, num_incidents_df, on = 'date', how='left')
d3_data = d3_data.reset_index()

d3_data.loc[d3_data['index']>=d3_data[d3_data['date']==d3_data[d3_data['num_harmed'].isnull()]['date'].min()].index.values[0], 'non_observation'] = 1
d3_data['year'] = d3_data['date'].dt.year

d3_data.loc[(d3_data['date'].dt.day==1) & (d3_data['date'].dt.month==1), 'nyd'] = 1
for i, new_years in enumerate(list(d3_data[d3_data['nyd']==1].index.values)):
    d3_data.loc[new_years, 'nyd'] = i

d3_data.to_csv('../cyaris.github.io/assets/us_harmed_victim_forecast_data.csv')

In [30]:
d3_data.columns

Index(['index', 'date', 'pred_2018', 'pred_2017', 'pred_2016', 'pred_2015',
       'pred_2014', 'num_harmed', 'num_incidents', 'non_observation', 'year',
       'nyd'],
      dtype='object')

In [31]:
d3_data.head(3)

Unnamed: 0,index,date,pred_2018,pred_2017,pred_2016,pred_2015,pred_2014,num_harmed,num_incidents,non_observation,year,nyd
0,0,2014-01-01,88.05,90.3,93.17,94.8,96.13,182.0,198.0,,2014,0.0
1,1,2014-01-02,97.27,99.43,102.44,103.68,105.65,77.0,107.0,,2014,
2,2,2014-01-03,104.69,106.74,109.83,110.74,113.19,96.0,120.0,,2014,
