In [1]:
import pandas as pd
import numpy as np
from fbprophet import Prophet
from copy import deepcopy

In [2]:
from warnings import filterwarnings

filterwarnings('ignore')

In [3]:
pd.set_option('display.float_format', lambda x: '%.2f' % x)
pd.set_option('display.max_columns', None)
# pd.set_option('display.max_colwidth', 1000)

In [4]:
df_date = pd.read_pickle('Pickles/df_date_2.pkl')
# creating a field for victims not including those "unharmed"
df_date['num_harmed'] = df_date['num_killed'] + df_date['num_injured']
df_date = df_date[['date', 'num_harmed']]

In [5]:
assert set(list(df_date.isnull().sum().values))=={0}

In [6]:
df_date['date'].dt.year.value_counts()

2016    366
2018    365
2017    365
2015    365
2014    365
2019     70
Name: date, dtype: int64

In [7]:
future_dataframe_lengths = {}
for year in df_date['date'].dt.year.unique():
    try:
        future_dataframe_lengths[year] = len(pd.period_range(min(df_date[df_date['date'].dt.year==year+1]['date']), max(df_date['date']))) + 365
    except:
        future_dataframe_lengths[year] = 365

print(future_dataframe_lengths)

{2014: 1896, 2015: 1531, 2016: 1165, 2017: 800, 2018: 435, 2019: 365}


In [8]:
for i, year in enumerate(df_date['date'].dt.year.unique()):
    df_date.rename({'date': 'ds', 'num_harmed': 'y'}, axis = 1, inplace = True)
    harmed_prophet = Prophet(changepoint_prior_scale = 0.5, daily_seasonality = True, yearly_seasonality = True)
    harmed_prophet.fit(df_date[df_date['ds'].dt.year <= year])
    harmed_forecast = harmed_prophet.make_future_dataframe(periods=future_dataframe_lengths[year], freq='D')
    harmed_forecast = harmed_prophet.predict(harmed_forecast)
    harmed_forecast['yearly_trend_calc_' + str(year)] = harmed_forecast['yhat'] - harmed_forecast['yhat'].shift(periods=365)
    harmed_forecast.rename({'ds': 'date', 'yhat': 'pred_' + str(year) + '_smooth_0'}, axis = 1, inplace = True)
    df_date.rename({'ds': 'date', 'y': 'num_harmed'}, axis = 1, inplace = True)
    if i==0:
        d3_data = pd.merge(harmed_forecast[['date', 'pred_' + str(year) + '_smooth_0']], df_date, on = 'date', how='outer')
        d3_data = pd.merge(harmed_forecast[['date', 'yearly_trend_calc_' + str(year)]], d3_data, on = 'date', how='outer')
    else:
        d3_data = pd.merge(harmed_forecast[['date', 'pred_' + str(year) + '_smooth_0']], d3_data, on = 'date', how='outer')
        d3_data = pd.merge(harmed_forecast[['date', 'yearly_trend_calc_' + str(year)]], d3_data, on = 'date', how='outer')

In [9]:
len(d3_data)

2261

In [10]:
column_list = ['date', 'num_harmed']
for column in d3_data.columns:
    if "smooth_0" in column:
        column_list.append(column)
        
for rate_value in np.arange(5, 25, 5):
    d3_data_calc = deepcopy(d3_data[column_list])
    keep_index = list(d3_data_calc.iloc[0::rate_value, :].index.values)
    for column in d3_data_calc.columns:
        if 'pred' in column:
            for i, index_value in enumerate(d3_data_calc.index.values):
                if i not in keep_index:
                    d3_data_calc.loc[i, column] = None
            d3_data_calc[column] = d3_data_calc[column].interpolate(method = 'cubic')
            d3_data_calc[column] = d3_data_calc[column].ffill()
            d3_data_calc.rename({column: str(column)[:-1] + str(rate_value)}, axis = 1, inplace = True)
    d3_data = pd.merge(d3_data, d3_data_calc, on = ['date', 'num_harmed'])

In [11]:
d3_data.tail(5)

Unnamed: 0,date,yearly_trend_calc_2019,pred_2019_smooth_0,yearly_trend_calc_2018,pred_2018_smooth_0,yearly_trend_calc_2017,pred_2017_smooth_0,yearly_trend_calc_2016,pred_2016_smooth_0,yearly_trend_calc_2015,pred_2015_smooth_0,yearly_trend_calc_2014,pred_2014_smooth_0,num_harmed,pred_2019_smooth_5,pred_2018_smooth_5,pred_2017_smooth_5,pred_2016_smooth_5,pred_2015_smooth_5,pred_2014_smooth_5,pred_2019_smooth_10,pred_2018_smooth_10,pred_2017_smooth_10,pred_2016_smooth_10,pred_2015_smooth_10,pred_2014_smooth_10,pred_2019_smooth_15,pred_2018_smooth_15,pred_2017_smooth_15,pred_2016_smooth_15,pred_2015_smooth_15,pred_2014_smooth_15,pred_2019_smooth_20,pred_2018_smooth_20,pred_2017_smooth_20,pred_2016_smooth_20,pred_2015_smooth_20,pred_2014_smooth_20
2256,2020-03-06,0.92,83.45,-4.5,72.6,-11.46,50.51,37.39,215.95,22.48,165.15,9.56,68.98,,70.29,58.77,35.95,201.12,151.27,50.83,115.9,104.97,83.76,248.33,195.33,96.09,108.51,97.32,75.87,236.83,185.51,90.26,82.37,70.19,48.52,210.3,158.18,60.7
2257,2020-03-07,23.94,113.22,18.05,102.47,11.03,80.87,60.15,247.25,44.03,194.3,24.97,95.61,,66.67,55.1,32.24,197.81,147.73,46.76,111.73,100.73,79.44,244.49,191.39,91.56,108.51,97.32,75.87,236.83,185.51,90.26,83.01,70.98,49.27,212.08,159.78,61.45
2258,2020-03-08,4.28,123.36,-1.55,112.75,-8.84,91.36,35.13,253.43,20.68,200.01,-0.87,96.35,,67.14,55.54,32.8,198.7,148.08,46.77,105.38,94.28,72.85,238.36,185.24,84.86,108.51,97.32,75.87,236.83,185.51,90.26,83.71,71.84,50.09,213.98,161.51,62.26
2259,2020-03-09,-39.42,89.82,-45.68,78.9,-53.72,56.97,-0.97,223.39,-14.98,169.98,-26.78,71.12,,72.92,61.36,38.93,205.07,153.55,52.12,96.64,85.38,63.76,229.69,176.65,75.77,108.51,97.32,75.87,236.83,185.51,90.26,84.46,72.78,50.97,216.03,163.37,63.13
2260,2020-03-10,-10.44,85.26,-16.93,73.8,-24.35,51.92,24.03,218.22,10.55,165.38,-8.53,64.07,,85.26,73.8,51.92,218.22,165.38,64.07,85.26,73.8,51.92,218.22,165.38,64.07,108.51,97.32,75.87,236.83,185.51,90.26,85.26,73.8,51.92,218.22,165.38,64.07


In [12]:
# harmed_forecast.columns

In [13]:
# harmed_prophet.changepoints[:5]

In [14]:
# exporting harmed_forecast predictions to assets repository for d3.js visualization
num_incidents_df = pd.read_pickle('Pickles/df_date_2.pkl')
num_incidents_df = num_incidents_df[['date', 'num_incidents']]
num_incidents_df.rename({'ds': 'date'}, axis = 1, inplace = True)
d3_data = pd.merge(d3_data, num_incidents_df, on = 'date', how='left')
d3_data = d3_data.reset_index()

d3_data.loc[d3_data['index']>=d3_data[d3_data['date']==d3_data[d3_data['num_harmed'].isnull()]['date'].min()].index.values[0], 'non_observation'] = 1
d3_data['year'] = d3_data['date'].dt.year

d3_data.loc[(d3_data['date'].dt.day==1) & (d3_data['date'].dt.month==1), 'nyd'] = 1
for i, new_years in enumerate(list(d3_data[d3_data['nyd']==1].index.values)):
    d3_data.loc[new_years, 'nyd'] = i

d3_data.to_csv('../cyaris.github.io/assets/us_harmed_victim_forecast_data.csv')

In [15]:
d3_data.columns

Index(['index', 'date', 'yearly_trend_calc_2019', 'pred_2019_smooth_0',
       'yearly_trend_calc_2018', 'pred_2018_smooth_0',
       'yearly_trend_calc_2017', 'pred_2017_smooth_0',
       'yearly_trend_calc_2016', 'pred_2016_smooth_0',
       'yearly_trend_calc_2015', 'pred_2015_smooth_0',
       'yearly_trend_calc_2014', 'pred_2014_smooth_0', 'num_harmed',
       'pred_2019_smooth_5', 'pred_2018_smooth_5', 'pred_2017_smooth_5',
       'pred_2016_smooth_5', 'pred_2015_smooth_5', 'pred_2014_smooth_5',
       'pred_2019_smooth_10', 'pred_2018_smooth_10', 'pred_2017_smooth_10',
       'pred_2016_smooth_10', 'pred_2015_smooth_10', 'pred_2014_smooth_10',
       'pred_2019_smooth_15', 'pred_2018_smooth_15', 'pred_2017_smooth_15',
       'pred_2016_smooth_15', 'pred_2015_smooth_15', 'pred_2014_smooth_15',
       'pred_2019_smooth_20', 'pred_2018_smooth_20', 'pred_2017_smooth_20',
       'pred_2016_smooth_20', 'pred_2015_smooth_20', 'pred_2014_smooth_20',
       'num_incidents', 'non_observat

In [16]:
d3_data.head(3)

Unnamed: 0,index,date,yearly_trend_calc_2019,pred_2019_smooth_0,yearly_trend_calc_2018,pred_2018_smooth_0,yearly_trend_calc_2017,pred_2017_smooth_0,yearly_trend_calc_2016,pred_2016_smooth_0,yearly_trend_calc_2015,pred_2015_smooth_0,yearly_trend_calc_2014,pred_2014_smooth_0,num_harmed,pred_2019_smooth_5,pred_2018_smooth_5,pred_2017_smooth_5,pred_2016_smooth_5,pred_2015_smooth_5,pred_2014_smooth_5,pred_2019_smooth_10,pred_2018_smooth_10,pred_2017_smooth_10,pred_2016_smooth_10,pred_2015_smooth_10,pred_2014_smooth_10,pred_2019_smooth_15,pred_2018_smooth_15,pred_2017_smooth_15,pred_2016_smooth_15,pred_2015_smooth_15,pred_2014_smooth_15,pred_2019_smooth_20,pred_2018_smooth_20,pred_2017_smooth_20,pred_2016_smooth_20,pred_2015_smooth_20,pred_2014_smooth_20,num_incidents,non_observation,year,nyd
0,0,2014-01-01,,89.33,,88.02,,90.28,,91.77,,95.39,,96.32,182.0,89.33,88.02,90.28,91.77,95.39,96.32,89.33,88.02,90.28,91.77,95.39,96.32,89.33,88.02,90.28,91.77,95.39,96.32,89.33,88.02,90.28,91.77,95.39,96.32,198.0,,2014,0.0
1,1,2014-01-02,,84.76,,82.97,,84.53,,86.25,,90.31,,90.01,77.0,82.23,80.81,82.89,84.0,87.05,88.48,98.3,97.13,99.32,101.07,104.15,105.79,88.43,87.1,89.04,90.31,94.27,94.73,89.27,88.25,90.4,91.8,95.76,96.1,107.0,,2014,
2,2,2014-01-03,,90.28,,89.06,,91.09,,92.82,,96.05,,99.95,96.0,80.04,78.6,80.59,81.46,83.95,85.83,105.49,104.44,106.56,108.48,111.11,113.29,87.51,86.19,87.82,88.9,93.15,93.21,89.12,88.37,90.41,91.71,95.95,95.78,120.0,,2014,
