# 3 Modeling<a id='5_Modeling'></a>

## 3.1 Contents<a id='5.1_Contents'></a>


## 3.2 Introduction<a id='5.2_Introduction'></a>

In [3]:
import pandas as pd
import numpy as np
import os
import pickle
import datetime
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import __version__ as sklearn_version
from sklearn.model_selection import cross_validate

## 3.3 Load Model<a id='5.4_Load_Model'></a>

In [11]:
expected_model_version = '1.0'
model_path = '../models/ski_resort_pricing_model.pkl'
if os.path.exists(model_path):
    with open(model_path, 'rb') as f:
        model = pickle.load(f)
    if model.version != expected_model_version:
        print("Expected model version doesn't match version loaded")
    if model.sklearn_version != sklearn_version: 
        print("Warning: model created under different sklearn version")
else:
    print("Expected model not found")

## 3.4 Load Data<a id='5.5_Load_Data'></a>

In [12]:
stocks_data_daily = pd.read_csv('../data/SNP_500_2020_cor90.csv',index_col=0)

## 3.5 Refit Model On All Available Data <a id='5.6_Refit_Model_On_All_Available_Data_(excluding_Big_Mountain)'></a>

In [13]:
X = stocks_data_daily.drop(columns='AAPL_Highest price')
y = stocks_data_daily['AAPL_Highest price']

In [14]:
model.fit(X, y)

ValueError: Cannot use median strategy with non-numeric data:
could not convert string to float: '2020-01-01'

In [15]:
cv_results = cross_validate(model, X, y, scoring='neg_mean_absolute_error', cv=5, n_jobs=-1)

In [16]:
cv_results['test_score']

array([nan, nan, nan, nan, nan])

In [17]:
mae_mean, mae_std = np.mean(-1 * cv_results['test_score']), np.std(-1 * cv_results['test_score'])
mae_mean, mae_std 

(nan, nan)

## 3.6 Calculate Expected Apple Stock Price Based on the Model<a id='5.7_Calculate_Expected_Big_Mountain_Ticket_Price_From_The_Model'></a>

In [18]:
stocks_data_daily_test = stocks_data_daily.copy()

In [24]:
stocks_data_daily_test.head()

Unnamed: 0,Date,AAPL_Highest price,MSFT_Highest price,AMZN_Highest price,FB_Highest price,TSLA_Highest price,NVDA_Highest price,PG_Highest price,PYPL_Highest price,ADBE_Highest price,...,POOL_Highest price,GWW_Highest price,TTWO_Highest price,ABMD_Highest price,JBHT_Highest price,MAS_Highest price,FBHS_Highest price,CHRW_Highest price,PENN_Highest price,ROL_Highest price
0,2020-01-01,75.15,160.73,1898.01,209.79,86.1391,59.9775,124.73,111.21,334.48,...,213.88,344.71,123.98,173.86,117.99,48.41,65.81,78.5,26.15,22.3933
1,2020-01-02,75.145,159.945,1886.1965,210.4,90.8,59.4575,123.53,110.42,332.98,...,215.68,342.26,122.5,169.3174,117.21,47.92,65.5,77.8,25.965,22.1733
2,2020-01-05,74.99,159.1,1903.69,212.78,90.312,59.3175,123.19,110.22,333.91,...,215.34,339.85,125.1264,179.42,116.1998,47.625,65.89,77.62,26.33,22.1933
3,2020-01-06,75.225,159.67,1913.89,214.58,94.326,60.4425,123.2059,111.56,334.79,...,214.68,339.3,126.47,182.44,117.86,47.54,65.9009,78.44,26.48,22.2667
4,2020-01-07,76.11,160.8,1910.9999,216.24,99.698,60.51,123.4299,112.64,339.23,...,215.27,343.11,128.995,180.73,119.41,47.93,67.13,79.69,27.075,22.2267


In [25]:
X_bm = stocks_data_daily_test.drop(columns='AAPL_Highest price')

In [26]:
Y_bm_ = stocks_data_daily_test['AAPL_Highest price']

In [27]:
Y_bm = Y_bm_.values.item()

ValueError: can only convert an array of size 1 to a Python scalar

In [28]:
bm_pred = model.predict(X_bm).item()

ValueError: Cannot use median strategy with non-numeric data:
could not convert string to float: '2020-01-01'

In [29]:
print(f'Apple Stock modelled price is ${bm_pred:.2f}, actual price is ${Y_bm:.2f}.')
print(f'Even with the expected mean absolute error of ${mae_mean:.2f}, this suggests there is room for an increase.')

NameError: name 'bm_pred' is not defined

# Test month

In [30]:
stocks_data_daily_test_month = stocks_data("2021-03-01", "2021-03-30",tickers)

NameError: name 'stocks_data' is not defined

In [31]:
stocks_data_daily_test_month.head()

NameError: name 'stocks_data_daily_test_month' is not defined

In [32]:
stocks_data_daily_test_month.loc[:stocks_data_daily_test_month.index[4]] 

NameError: name 'stocks_data_daily_test_month' is not defined

In [33]:
stocks_data_daily_test_month.iloc[0].to_frame().T

NameError: name 'stocks_data_daily_test_month' is not defined

In [34]:
stocks_data_daily_test_month.index

NameError: name 'stocks_data_daily_test_month' is not defined

In [35]:
def model_test(df):
    df.drop(columns='AAPL_Highest price')
    y_bm = df['AAPL_Highest price']
    forcast_df = []
    for row in range(len(df)):
        bm_pred = round(model.predict(stocks_data_daily_test_month.iloc[row].to_frame().T).item(),2)
        actual = round(y_bm[row],2)
        #date = df.index[row]
        #print(date)
        forcast_df.append([actual,bm_pred])
    return pd.DataFrame(data= forcast_df, columns=['Actual', 'Predicted'])

In [36]:
model_test(stocks_data_daily_test_month)

NameError: name 'stocks_data_daily_test_month' is not defined