In [1]:
import pandas as pd
import numpy as np
from fbprophet import Prophet
from copy import deepcopy

In [2]:
from warnings import filterwarnings

filterwarnings('ignore')

In [3]:
pd.set_option('display.float_format', lambda x: '%.2f' % x)
pd.set_option('display.max_columns', None)
# pd.set_option('display.max_colwidth', 1000)

In [4]:
df_date = pd.read_pickle('Pickles/df_date_2.pkl')
# creating a field for victims not including those "unharmed"
df_date['num_harmed'] = df_date['num_killed'] + df_date['num_injured']
df_date = df_date[['date', 'num_harmed']]

In [5]:
assert set(list(df_date.isnull().sum().values))=={0}

In [6]:
df_date['date'].dt.year.value_counts()

2016    366
2018    365
2017    365
2015    365
2014    365
2019     70
Name: date, dtype: int64

In [7]:
future_dataframe_lengths = {}
for year in df_date['date'].dt.year.unique():
    try:
        future_dataframe_lengths[year] = len(pd.period_range(min(df_date[df_date['date'].dt.year==year+1]['date']), max(df_date['date']))) + 365
    except:
        future_dataframe_lengths[year] = 365

print(future_dataframe_lengths)

{2014: 1896, 2015: 1531, 2016: 1165, 2017: 800, 2018: 435, 2019: 365}


In [8]:
for i, year in enumerate(df_date['date'].dt.year.unique()):
    df_date.rename({'date': 'ds', 'num_harmed': 'y'}, axis = 1, inplace = True)
    harmed_prophet = Prophet(changepoint_prior_scale = 0.5, daily_seasonality = True, yearly_seasonality = True)
    harmed_prophet.fit(df_date[df_date['ds'].dt.year <= year])
    harmed_forecast = harmed_prophet.make_future_dataframe(periods=future_dataframe_lengths[year], freq='D')
    harmed_forecast = harmed_prophet.predict(harmed_forecast)
    harmed_forecast['yearly_trend_calc_' + str(year)] = harmed_forecast['yhat'] - harmed_forecast['yhat'].shift(periods=365)
    harmed_forecast.rename({'ds': 'date', 'yhat': 'pred_' + str(year)}, axis = 1, inplace = True)
    df_date.rename({'ds': 'date', 'y': 'num_harmed'}, axis = 1, inplace = True)
    if i==0:
        d3_data = pd.merge(harmed_forecast[['date', 'pred_' + str(year)]], df_date, on = 'date', how='outer')
        d3_data = pd.merge(harmed_forecast[['date', 'yearly_trend_calc_' + str(year)]], d3_data, on = 'date', how='outer')
    else:
        d3_data = pd.merge(harmed_forecast[['date', 'pred_' + str(year)]], d3_data, on = 'date', how='outer')
        d3_data = pd.merge(harmed_forecast[['date', 'yearly_trend_calc_' + str(year)]], d3_data, on = 'date', how='outer')

In [9]:
len(d3_data)

2261

In [10]:
# column_list = ['date', 'num_harmed']
# for column in d3_data.columns:
#     if "smooth_0" in column:
#         column_list.append(column)
        
# for rate_value in np.arange(5, 25, 5):
#     d3_data_calc = deepcopy(d3_data[column_list])
#     keep_index = list(d3_data_calc.iloc[0::rate_value, :].index.values)
#     for column in d3_data_calc.columns:
#         if 'pred' in column:
#             for i, index_value in enumerate(d3_data_calc.index.values):
#                 if i not in keep_index:
#                     d3_data_calc.loc[i, column] = None
#             d3_data_calc[column] = d3_data_calc[column].interpolate(method = 'cubic')
#             d3_data_calc[column] = d3_data_calc[column].ffill()
#             d3_data_calc.rename({column: str(column)[:-1] + str(rate_value)}, axis = 1, inplace = True)
#     d3_data = pd.merge(d3_data, d3_data_calc, on = ['date', 'num_harmed'])

In [11]:
d3_data.tail(5)

Unnamed: 0,date,yearly_trend_calc_2019,pred_2019,yearly_trend_calc_2018,pred_2018,yearly_trend_calc_2017,pred_2017,yearly_trend_calc_2016,pred_2016,yearly_trend_calc_2015,pred_2015,yearly_trend_calc_2014,pred_2014,num_harmed
2256,2020-03-06,1.21,83.88,-4.22,73.03,-9.85,55.43,36.44,211.74,23.84,171.3,9.11,66.41,
2257,2020-03-07,23.98,113.51,18.07,102.76,12.33,85.63,58.78,242.82,45.36,200.45,24.48,93.01,
2258,2020-03-08,4.46,123.66,-1.39,113.04,-7.37,96.13,33.98,249.0,22.05,206.2,-1.3,93.77,
2259,2020-03-09,-39.25,90.1,-45.53,79.18,-52.24,61.74,-2.12,218.94,-13.61,176.18,-27.23,68.54,
2260,2020-03-10,-9.81,86.0,-16.29,74.54,-22.29,57.27,23.35,214.22,12.61,172.27,-9.0,61.48,


In [12]:
# harmed_forecast.columns

In [13]:
# harmed_prophet.changepoints[:5]

In [14]:
# exporting harmed_forecast predictions to assets repository for d3.js visualization
d3_data = d3_data.reset_index()

d3_data.loc[d3_data['index']>=d3_data[d3_data['date']==d3_data[d3_data['num_harmed'].isnull()]['date'].min()].index.values[0], 'non_observation'] = 1
d3_data['year'] = d3_data['date'].dt.year

d3_data.loc[(d3_data['date'].dt.day==1) & (d3_data['date'].dt.month==1), 'nyd'] = 1
for i, new_years in enumerate(list(d3_data[d3_data['nyd']==1].index.values)):
    d3_data.loc[new_years, 'nyd'] = i

d3_data.to_csv('../cyaris.github.io/assets/us_harmed_victim_forecast_data.csv')

In [15]:
d3_data.columns

Index(['index', 'date', 'yearly_trend_calc_2019', 'pred_2019',
       'yearly_trend_calc_2018', 'pred_2018', 'yearly_trend_calc_2017',
       'pred_2017', 'yearly_trend_calc_2016', 'pred_2016',
       'yearly_trend_calc_2015', 'pred_2015', 'yearly_trend_calc_2014',
       'pred_2014', 'num_harmed', 'non_observation', 'year', 'nyd'],
      dtype='object')

In [16]:
d3_data.head(3)

Unnamed: 0,index,date,yearly_trend_calc_2019,pred_2019,yearly_trend_calc_2018,pred_2018,yearly_trend_calc_2017,pred_2017,yearly_trend_calc_2016,pred_2016,yearly_trend_calc_2015,pred_2015,yearly_trend_calc_2014,pred_2014,num_harmed,non_observation,year,nyd
0,0,2014-01-01,,89.42,,87.55,,89.09,,91.39,,95.65,,96.12,182.0,,2014,0.0
1,1,2014-01-02,,84.83,,82.49,,83.37,,85.83,,90.55,,89.84,77.0,,2014,
2,2,2014-01-03,,90.47,,88.72,,90.15,,92.6,,96.25,,99.81,96.0,,2014,
