# 3 Modeling<a id='5_Modeling'></a>

## 3.1 Contents<a id='5.1_Contents'></a>


## 3.2 Introduction<a id='5.2_Introduction'></a>

In [149]:
import pandas as pd
import numpy as np
import os
import pickle
import datetime
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import __version__ as sklearn_version
from sklearn.model_selection import cross_validate

## 3.3 Load Model<a id='5.4_Load_Model'></a>

In [64]:
expected_model_version = '1.0'
model_path = '../models/ski_resort_pricing_model.pkl'
if os.path.exists(model_path):
    with open(model_path, 'rb') as f:
        model = pickle.load(f)
    if model.version != expected_model_version:
        print("Expected model version doesn't match version loaded")
    if model.sklearn_version != sklearn_version: 
        print("Warning: model created under different sklearn version")
else:
    print("Expected model not found")

## 3.4 Load Data<a id='5.5_Load_Data'></a>

In [243]:
stocks_data_daily = pd.read_csv('../data/SNP_500_2020_cor90.csv',index_col=0)

## 3.5 Refit Model On All Available Data <a id='5.6_Refit_Model_On_All_Available_Data_(excluding_Big_Mountain)'></a>

In [66]:
X = stocks_data_daily.drop(columns='AAPL_Highest price')
y = stocks_data_daily['AAPL_Highest price']

In [67]:
model.fit(X, y)

Pipeline(memory=None,
         steps=[('simpleimputer',
                 SimpleImputer(add_indicator=False, copy=True, fill_value=None,
                               missing_values=nan, strategy='median',
                               verbose=0)),
                ('standardscaler', None),
                ('randomforestregressor',
                 RandomForestRegressor(bootstrap=True, ccp_alpha=0.0,
                                       criterion='mse', max_depth=None,
                                       max_features='auto', max_leaf_nodes=None,
                                       max_samples=None,
                                       min_impurity_decrease=0.0,
                                       min_impurity_split=None,
                                       min_samples_leaf=1, min_samples_split=2,
                                       min_weight_fraction_leaf=0.0,
                                       n_estimators=1000, n_jobs=None,
                                    

In [244]:
cv_results = cross_validate(model, X, y, scoring='neg_mean_absolute_error', cv=5, n_jobs=-1)

In [245]:
cv_results['test_score']

array([-2.40897844, -3.12895184, -3.83355941, -4.69253264, -4.87125613])

In [246]:
mae_mean, mae_std = np.mean(-1 * cv_results['test_score']), np.std(-1 * cv_results['test_score'])
mae_mean, mae_std 

(3.787055689979927, 0.9305631925504458)

## 3.6 Calculate Expected Apple Stock Price Based on the Model<a id='5.7_Calculate_Expected_Big_Mountain_Ticket_Price_From_The_Model'></a>

In [247]:
stocks_data_daily_test = stocks_data_daily.copy()

In [248]:
stocks_data_daily_test.head()

Unnamed: 0,AAPL_Highest price,MSFT_Highest price,AMZN_Highest price,FB_Highest price,TSLA_Highest price,NVDA_Highest price,PG_Highest price,PYPL_Highest price,ADBE_Highest price,TMO_Highest price,...,POOL_Highest price,GWW_Highest price,TTWO_Highest price,ABMD_Highest price,JBHT_Highest price,MAS_Highest price,FBHS_Highest price,CHRW_Highest price,PENN_Highest price,ROL_Highest price
0,75.15,160.73,1898.01,209.79,86.1391,59.9775,124.73,111.21,334.48,326.94,...,213.88,344.71,123.98,173.86,117.99,48.41,65.81,78.5,26.15,22.3933
1,75.145,159.945,1886.1965,210.4,90.8,59.4575,123.53,110.42,332.98,325.29,...,215.68,342.26,122.5,169.3174,117.21,47.92,65.5,77.8,25.965,22.1733
2,74.99,159.1,1903.69,212.78,90.312,59.3175,123.19,110.22,333.91,325.33,...,215.34,339.85,125.1264,179.42,116.1998,47.625,65.89,77.62,26.33,22.1933
3,75.225,159.67,1913.89,214.58,94.326,60.4425,123.2059,111.56,334.79,329.24,...,214.68,339.3,126.47,182.44,117.86,47.54,65.9009,78.44,26.48,22.2667
4,76.11,160.8,1910.9999,216.24,99.698,60.51,123.4299,112.64,339.23,330.59,...,215.27,343.11,128.995,180.73,119.41,47.93,67.13,79.69,27.075,22.2267


In [42]:
X_bm = stocks_data_daily_test.drop(columns='AAPL_Highest price')

In [51]:
Y_bm_ = stocks_data_daily_test['AAPL_Highest price']

In [52]:
Y_bm = Y_bm_.values.item()

In [44]:
bm_pred = model.predict(X_bm).item()

In [249]:
print(f'Apple Stock modelled price is ${bm_pred:.2f}, actual price is ${Y_bm:.2f}.')
print(f'Even with the expected mean absolute error of ${mae_mean:.2f}, this suggests there is room for an increase.')

Apple Stock modelled price is $136.31, actual price is $135.77.
Even with the expected mean absolute error of $3.79, this suggests there is room for an increase.


# Test month

In [218]:
stocks_data_daily_test_month = stocks_data("2021-03-01", "2021-03-30",tickers)

In [219]:
stocks_data_daily_test_month.head()

Unnamed: 0_level_0,AAPL_Highest price,MSFT_Highest price,AMZN_Highest price,GOOGL_Highest price,FB_Highest price,GOOG_Highest price,TSLA_Highest price,NVDA_Highest price,BRK.B_Highest price,JPM_Highest price,...,MO_Highest price,GILD_Highest price,LMT_Highest price,TJX_Highest price,PNC_Highest price,LRCX_Highest price,ADP_Highest price,MU_Highest price,MDLZ_Highest price,USB_Highest price
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-02-28 21:00,127.93,237.47,3149.56,2076.23,266.65,2086.52,872.0,139.25,250.55,151.3799,...,45.155,62.63,337.72,67.85,174.48,600.5,178.455,95.75,54.17,51.545
2021-03-01 21:00,128.72,237.3,3163.52,2094.74,266.71,2104.37,721.11,139.205,251.34,151.78,...,45.13,63.59,340.01,67.67,177.81,598.76,177.99,94.9,54.1501,51.405
2021-03-02 21:00,125.71,233.5799,3107.775,2075.99,260.9899,2088.518,700.7,134.515,254.81,154.98,...,45.635,63.32,346.3312,66.02,176.87,581.58,175.09,93.33,53.61,52.18
2021-03-03 21:00,123.6,232.49,3058.13,2074.59,266.49,2089.24,873.94,129.75,252.55,154.3786,...,45.55,64.54,344.35,64.45,174.87,559.98,175.87,89.75,54.93,51.53
2021-03-04 21:00,121.935,233.27,3009.0,2106.2,265.45,2118.11,627.8419,125.5,253.45,153.49,...,45.3,64.745,341.17,63.21,176.9,550.12,179.0,89.32,55.91,52.685


In [220]:
stocks_data_daily_test_month.loc[:stocks_data_daily_test_month.index[4]] 

Unnamed: 0_level_0,AAPL_Highest price,MSFT_Highest price,AMZN_Highest price,GOOGL_Highest price,FB_Highest price,GOOG_Highest price,TSLA_Highest price,NVDA_Highest price,BRK.B_Highest price,JPM_Highest price,...,MO_Highest price,GILD_Highest price,LMT_Highest price,TJX_Highest price,PNC_Highest price,LRCX_Highest price,ADP_Highest price,MU_Highest price,MDLZ_Highest price,USB_Highest price
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-02-28 21:00,127.93,237.47,3149.56,2076.23,266.65,2086.52,872.0,139.25,250.55,151.3799,...,45.155,62.63,337.72,67.85,174.48,600.5,178.455,95.75,54.17,51.545
2021-03-01 21:00,128.72,237.3,3163.52,2094.74,266.71,2104.37,721.11,139.205,251.34,151.78,...,45.13,63.59,340.01,67.67,177.81,598.76,177.99,94.9,54.1501,51.405
2021-03-02 21:00,125.71,233.5799,3107.775,2075.99,260.9899,2088.518,700.7,134.515,254.81,154.98,...,45.635,63.32,346.3312,66.02,176.87,581.58,175.09,93.33,53.61,52.18
2021-03-03 21:00,123.6,232.49,3058.13,2074.59,266.49,2089.24,873.94,129.75,252.55,154.3786,...,45.55,64.54,344.35,64.45,174.87,559.98,175.87,89.75,54.93,51.53
2021-03-04 21:00,121.935,233.27,3009.0,2106.2,265.45,2118.11,627.8419,125.5,253.45,153.49,...,45.3,64.745,341.17,63.21,176.9,550.12,179.0,89.32,55.91,52.685


In [239]:
stocks_data_daily_test_month.iloc[0].to_frame().T

Unnamed: 0,AAPL_Highest price,MSFT_Highest price,AMZN_Highest price,GOOGL_Highest price,FB_Highest price,GOOG_Highest price,TSLA_Highest price,NVDA_Highest price,BRK.B_Highest price,JPM_Highest price,...,MO_Highest price,GILD_Highest price,LMT_Highest price,TJX_Highest price,PNC_Highest price,LRCX_Highest price,ADP_Highest price,MU_Highest price,MDLZ_Highest price,USB_Highest price
2021-02-28 21:00,127.93,237.47,3149.56,2076.23,266.65,2086.52,872.0,139.25,250.55,151.3799,...,45.155,62.63,337.72,67.85,174.48,600.5,178.455,95.75,54.17,51.545


In [223]:
stocks_data_daily_test_month.index

Index(['2021-02-28 21:00', '2021-03-01 21:00', '2021-03-02 21:00',
       '2021-03-03 21:00', '2021-03-04 21:00', '2021-03-07 21:00',
       '2021-03-08 21:00', '2021-03-09 21:00', '2021-03-10 21:00',
       '2021-03-11 21:00', '2021-03-14 21:00', '2021-03-15 21:00',
       '2021-03-16 21:00', '2021-03-17 21:00', '2021-03-18 21:00',
       '2021-03-21 21:00', '2021-03-22 21:00', '2021-03-23 21:00',
       '2021-03-24 21:00', '2021-03-25 21:00', '2021-03-28 21:00',
       '2021-03-29 21:00'],
      dtype='object', name='Date')

In [240]:
def model_test(df):
    df.drop(columns='AAPL_Highest price')
    y_bm = df['AAPL_Highest price']
    forcast_df = []
    for row in range(len(df)):
        bm_pred = round(model.predict(stocks_data_daily_test_month.iloc[row].to_frame().T).item(),2)
        actual = round(y_bm[row],2)
        #date = df.index[row]
        #print(date)
        forcast_df.append([actual,bm_pred])
    return pd.DataFrame(data= forcast_df, columns=['Actual', 'Predicted'])

In [241]:
model_test(stocks_data_daily_test_month)

Unnamed: 0,Actual,Predicted
0,127.93,125.8
1,128.72,128.04
2,125.71,124.88
3,123.6,123.84
4,121.94,123.68
5,121.0,125.8
6,122.06,127.82
7,122.17,127.92
8,123.21,128.65
9,121.17,128.56


In [2]:
! ls

1_Data Wrangling.ipynb                  3_Modeling.ipynb
2_Predicting_Apple_Stock_S&P 500_.ipynb [34mlibrary[m[m
