# Structure of notebook
 > Basic prophet model on top 50 zipcodes <br>
 > How we chose the top 5 <br>
 > Optimize Parameters for 5 top Zipcodes Using Grid<br>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from fbprophet import Prophet
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import ParameterGrid

In [None]:
# dropping columns that arent needed
df = pd.read_csv('../../src/data/zillow_data_lf.csv',)
df.time = pd.to_datetime(df.time)
df.drop(['RegionID','SizeRank'], axis = 1, inplace = True)
df = df[pd.DatetimeIndex(df['time']).year >2009]
df.head()

# Modeling with prophet

In [None]:
def Prophet_func2(df, zipcodes, period, size_train):
    
    df_info = pd.DataFrame(dict(Zipcode=[], 
                                 Home_Value=[], 
                                 Future_Home_Value=[], 
                                 Percent_Increase=[],
                                 RMSE = []))
    
    for zipcode in zipcodes:
        
       # selecting zipcode from df and changing time and value to ds and y
        data= df[df['RegionName']== zipcode]
        data = data[['time', 'value']]
        data.columns = ['ds','y']
        
        # creating stop index
        eotindex = round(data.shape[0] * size_train)
        train = data[:eotindex]
        test = data[eotindex:]
        
        train_size = train.shape[0]
        test_size = test.shape[0]
        freq = 'MS'
        
        value_now = int(data.y.tail(1))

        # Prophet Func
        model = Prophet(daily_seasonality= False, weekly_seasonality= False, interval_width=0.95)
        model.fit(train)
        future = model.make_future_dataframe(periods = period + test_size, freq = freq)
        forecast = model.predict(future)

        value_future = round(list(forecast.yhat)[-1],-2)
        diff= value_future - value_now
        rate_5_yrs = 100*(diff / value_now)
        root_mse = (np.sqrt(mean_squared_error(test.y,forecast.yhat[train_size:test_size+train_size])))/(train.y.mean())

        df_info = df_info.append({'Percent_Increase': rate_5_yrs,
                                  'Home_Value': value_now, 
                                  'Future_Home_Value': value_future,
                                  'Zipcode': zipcode,
                                  'RMSE': root_mse}, ignore_index = True)
                                    
        df_info = df_info.astype('int64')
    return df_info

In [None]:
zipcodes = [60089, 60565, 46321, 60564, 46304]
period = 60
size_train = .8

Prophet_func2(df, zipcodes, period, size_train)

# Optimizing Prophet

In [None]:
def optimized_params(df, zipcode, size_train):

    model_parameters = pd.DataFrame(columns = ['RMSE','Parameters'])
    
    params_grid = {'seasonality_mode':('multiplicative','additive'),
                   'changepoint_prior_scale':[0.1,0.3,.05],
                   'n_changepoints' : [5,10,20]}
    grid = ParameterGrid(params_grid)
    
    data= df[df['RegionName']== zipcode]
    data = data[['time', 'value']]
    data.columns = ['ds','y']

    eotindex = round(data.shape[0] * size_train)
    train = data[:eotindex]
    test = data[eotindex:]

    train_size = train.shape[0]
    test_size = test.shape[0]
    freq = 'MS'
    period = 60

    for p in grid:
    
        # Prophet Func
        model = Prophet(changepoint_prior_scale = p['changepoint_prior_scale'],
                             n_changepoints = p['n_changepoints'],
                             seasonality_mode = p['seasonality_mode'],
                             weekly_seasonality=False,
                             daily_seasonality = False,
                             yearly_seasonality = True,
                             interval_width=0.95)
        model.fit(train)
        future = model.make_future_dataframe(periods = period + test_size, freq = freq)
        forecast = model.predict(future)
        root_mse = np.sqrt(mean_squared_error(test.y,forecast.yhat[train_size:test_size+train_size]))


        model_parameters = model_parameters.append({'RMSE':root_mse,'Parameters':p},ignore_index=True)
        
        
    return model_parameters

In [None]:
size_train = .8
zipcode  = None
df_30331 = optimized_params(df, zipcode, size_train)

In [None]:
parameters = model_parameters.sort_values(by=['RMSE']).reset_index(drop=True)
parameters['Parameters'][0]

# Visualizations of Model Using Optimized Params

In [None]:
data= df[df['RegionName']== 30331]
data = data[['time', 'value']]
data.columns = ['ds','y']

eotindex = round(data.shape[0] * .8)
train = data[:eotindex]
test = data[eotindex:]

train_size = train.shape[0]
test_size = test.shape[0]
freq = 'MS'
period = 60

In [None]:
model = Prophet(changepoint_prior_scale = .5,
                         n_changepoints = 20,
                         seasonality_mode = 'additive',
                         weekly_seasonality=False,
                         daily_seasonality = False,
                         yearly_seasonality = True,
                         interval_width=0.8)
model.fit(train)
future = model.make_future_dataframe(periods = period + test_size, freq = freq)
forecast = model.predict(future)

In [None]:
pd.plotting.register_matplotlib_converters()
f, ax = plt.subplots(figsize=(14,5))
train.plot(kind='line', x='ds', y='y', color='blue', label='Train', ax=ax)
test.plot(kind='line', x='ds', y='y', color='red', label='test', ax=ax)
forecast.loc[train_size:data.shape[0]-1].plot(kind='line', x='ds', y='yhat', color='Black', label='Predicted', ax=ax)
plt.title('Sales Amount Traning and Test data')
plt.show()

In [None]:
root_mse = np.sqrt(mean_squared_error(test.y,forecast.yhat[train_size:test_size+train_size]))
root_mse