# <img src = "./resources/GA.png" width = "25" height = "25" /> <span style = "color:Purple" > Project 5 : Food Insecurity Regression Study </span> 
---
## <span style = "color:Green" > Multivariate Time Series Modeling </span>      

#### Ira Seidman, Alec Edgecliffe-Johnson, Ryan McDonald, Andrew Roberts - General Assembly 
---

### Notebook Contents:

- [Reading the Data](#intro)
- [Model with VAR](#var)
- [Data Preparation](#prep)


**Imports**

In [1]:
# Data manipulation imports
import pandas as pd
import numpy as np

# Plotting imports
import matplotlib.pyplot as plt
import seaborn as sns

from statsmodels.tsa.vector_ar.var_model import VAR

<a id='intro'></a>
## 1. Reading in The Data

In [2]:
data = pd.read_csv('./data/time_series/pov_fi_10_yr_c.csv')
data.head()

Unnamed: 0.1,Unnamed: 0,time,state,pov,fi
0,1.0,1/1/2010,Alabama,18.9,20.379104
1,2.0,1/1/2011,Alabama,18.9,19.292537
2,3.0,1/1/2012,Alabama,21.3,19.497412
3,4.0,1/1/2013,Alabama,23.8,18.569592
4,5.0,1/1/2014,Alabama,20.6,18.800626


In [3]:
data = data.drop(columns = ['Unnamed: 0'])
data.head()

Unnamed: 0,time,state,pov,fi
0,1/1/2010,Alabama,18.9,20.379104
1,1/1/2011,Alabama,18.9,19.292537
2,1/1/2012,Alabama,21.3,19.497412
3,1/1/2013,Alabama,23.8,18.569592
4,1/1/2014,Alabama,20.6,18.800626


In [4]:
data.dtypes

time      object
state     object
pov      float64
fi       float64
dtype: object

In [5]:
data['time'] = pd.to_datetime(data['time'])
data.head()

Unnamed: 0,time,state,pov,fi
0,2010-01-01,Alabama,18.9,20.379104
1,2011-01-01,Alabama,18.9,19.292537
2,2012-01-01,Alabama,21.3,19.497412
3,2013-01-01,Alabama,23.8,18.569592
4,2014-01-01,Alabama,20.6,18.800626


<a id='var'></a>
## 2. Modeling with Var

**Plots developed, saved to CSV and added to Tableau/Streamlit**

In [6]:
# Code and methodology developed with help from our instructor Adi and https://www.analyticsvidhya.com/blog/2018/09/multivariate-time-series-guide-forecasting-modeling-python-codes/ and from https://www.youtube.com/watch?v=sCl6CXZ2xBg

# Creating a full states list:
states = ['Alabama','Alaska','Arizona','Arkansas','California','Colorado',
          'Connecticut','District of Columbia', 'Delaware','Florida','Georgia','Hawaii','Idaho',
          'Illinois', 'Indiana','Iowa','Kansas','Kentucky','Louisiana','Maine','Maryland',
          'Massachusetts','Michigan','Minnesota','Mississippi','Missouri','Montana',
          'Nebraska','Nevada','New Hampshire','New Jersey','New Mexico','New York',
          'North Carolina','North Dakota','Ohio','Oklahoma','Oregon','Pennsylvania',
          'Rhode Island','South Carolina','South Dakota','Tennessee','Texas','Utah',
          'Vermont','Virginia','Washington','West Virginia','Wisconsin','Wyoming']

# Loop through all states
for s in states:
    # Training data
    train_data = data[data['state'] == s]
    train_data = train_data[['time', 'fi', 'pov']]
    train_data = train_data.drop(train_data.tail(2).index)
    
    # Validation data
    valid_data = data[data['state'] == s]
    valid_data = valid_data[['time', 'fi', 'pov']]
    valid_data = valid_data.drop(valid_data.head(7).index)
    
    # All data
    all_data = data[data['state'] == s]
    all_data = all_data[['time', 'fi', 'pov']]
    
    # Set time column to index
    train_data.set_index('time', inplace = True)
    valid_data.set_index('time', inplace = True)
    all_data.set_index('time', inplace = True)
    
    # Set valid index for 7 years
    index_7_years = pd.date_range(train_data.index[-1], freq = 'AS', periods = 7)
    
    # Future index - 7 years
    future_7_years = pd.date_range(valid_data.index[-1], freq = 'AS', periods = 7)
    
    # Reset forecasts betwee states
    
    # Drop all tables:
#     try:
#         del t_fcast1
#         del f_fcast1
        
#     except:
#         print('')
    
##################################################################################################################
################################################ VAR #########################################################
##################################################################################################################

################################################  VAR Validation Phase ###################################################

    model = VAR(endog = train_data, freq='AS-JAN')
    model_fit = model.fit()

    # make prediction on validation
    t_fcast1 = model_fit.forecast(model_fit.y, steps = 7)
    t_fcast1 = pd.DataFrame(t_fcast1, index = index_7_years)

################################################  VAR Future Phase #######################################################
    
    model2 = VAR(endog = all_data, freq='AS-JAN')
    model2_fit = model.fit()
        
    f_fcast1 = model2_fit.forecast(model2_fit.y, steps=7)
        
    f_fcast1 = pd.DataFrame(f_fcast1, index=future_7_years)
        
##################################################################################################################
################################################ Plotting ######################################################
##################################################################################################################

# PLOTS PREVIOUSLY DEVELOPED AND ADDED TO TABLEAU/STREAMLIT!!

################################################  DF VAR #######################################################
     
#     try:
#         # Creating df for forecast
    t_fcast1 = t_fcast1.reset_index()
    t_fcast1.columns = ['Year', "VAR FI", 'VAR POV']
    
    f_fcast1 = f_fcast1.reset_index()
    f_fcast1.columns = ['Year', 'VAR Future FI', 'VAR Future Pov']
        
    #Extra Columns
    t_fcast1[['VAR Future FI', 'VAR Future Pov']] = np.nan
    f_fcast1[["VAR FI", 'VAR POV']] = np.nan
        
    # Reordering
    t_fcast1 = t_fcast1[['Year', 'VAR Future FI', 'VAR Future Pov', 'VAR FI', 'VAR POV']]
        
    # Joining them togther
    df_fcast1 = pd.concat([t_fcast1, f_fcast1], axis = 0)
    df_fcast1['State'] = s

################################################ Aggregating and Joining #######################################################

    df_fcast1 = df_fcast1.groupby(['Year', 'State'], as_index = False).agg({'VAR Future FI': 'sum', 'VAR Future Pov': 'sum'})   
        
    all_forecasts = df_fcast1.copy()

    # Save predictions in df. First time this will fail and just give all_forecasts, after that will concat for each state
    try: 
        final_forecasts = pd.concat([final_forecasts, all_forecasts], ignore_index = True).copy()
    except:
        final_forecasts = all_forecasts.copy()


  obj = getattr(results, attr)


<a id='prep'></a>
## 3. Preparing Original Data and Forecast Data for Concatenation

In [7]:
data.head()

Unnamed: 0,time,state,pov,fi
0,2010-01-01,Alabama,18.9,20.379104
1,2011-01-01,Alabama,18.9,19.292537
2,2012-01-01,Alabama,21.3,19.497412
3,2013-01-01,Alabama,23.8,18.569592
4,2014-01-01,Alabama,20.6,18.800626


In [8]:
# Renaming columns
final_forecasts.rename(columns = {'VAR Future FI': 'pov',
                                  'VAR Future Pov': 'fi',
                                  'State': 'state',
                                  'Year': 'year'
                                 }, inplace = True)

data.rename(columns = {'time' : 'year'}, inplace = True)
final_forecasts.head()

Unnamed: 0,year,state,pov,fi
0,2017-01-01,Alabama,0.0,0.0
1,2018-01-01,Alabama,0.0,0.0
2,2019-01-01,Alabama,18.48537,21.869745
3,2020-01-01,Alabama,18.557806,21.571658
4,2021-01-01,Alabama,18.602132,21.542153


In [9]:
# Dropping 0s
final_forecasts['pov'][final_forecasts['pov'] == 0] = np.nan
final_forecasts['fi'][final_forecasts['fi'] == 0] = np.nan
final_forecasts.dropna(inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final_forecasts['pov'][final_forecasts['pov'] == 0] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final_forecasts['fi'][final_forecasts['fi'] == 0] = np.nan


In [10]:
# Dropping 2019 in forecasts as we have it in the original
final_forecasts.drop(final_forecasts[(final_forecasts['year'] == '2019-01-01')].index, inplace = True)

In [11]:
# Concatenating and dropping NA
output_df = pd.concat([data, final_forecasts], axis = 0)

output_df.dropna(inplace = True)

In [12]:
output_df.head()

Unnamed: 0,year,state,pov,fi
0,2010-01-01,Alabama,18.9,20.379104
1,2011-01-01,Alabama,18.9,19.292537
2,2012-01-01,Alabama,21.3,19.497412
3,2013-01-01,Alabama,23.8,18.569592
4,2014-01-01,Alabama,20.6,18.800626


In [13]:
# Exporting to CSV for use in tableau
output_df.to_csv('./data/time_series/var_model_preds.csv')