In [1]:
import pandas as pd
import numpy as np

# plotting
import matplotlib.pyplot as plt
import seaborn as sns
from pandas.plotting import autocorrelation_plot
sns.set_style("dark")

# Data preprocessing
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from pandas_datareader.data import DataReader
import datetime as dt

# libraries for pipeline|
from sklearn.preprocessing import FunctionTransformer, OneHotEncoder, StandardScaler, MinMaxScaler, RobustScaler, MaxAbsScaler
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.impute import SimpleImputer
from sklearn.decomposition import PCA

#metrics
from sklearn.metrics import mean_squared_error

# Models to try
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVC

# custom functions
import functions

# Market Data Gathering

In [2]:
# Important dates
start = dt.datetime(2017,8,6)
end = dt.datetime(2022,8,6)

In [3]:
# import data
nflx = DataReader("NFLX",  "yahoo", start, end)
gspc = DataReader("^GSPC", "yahoo", start, end)

In [4]:
# concatenate stock and market data
features = functions.concatRename(nflx, 'nflx_', gspc, 'gspc_')

In [5]:
# remove test data from train
train, test = functions.testSplit(features)

In [6]:
# Establish variables and target
nflx_target = 'nflx_Adj Close'
gspc_target = 'gspc_Adj Close'

# Remove target variable
x_train = functions.dropTarget(train, nflx_target)
x_test = functions.dropTarget(test, nflx_target)
y_train = train[nflx_target]
y_test = test[nflx_target]

In [7]:
x_train.shape

(1008, 11)

In [8]:
x_test.shape

(251, 11)

# Feature Engineering

In [9]:
# function transform nflx engineering functions
# t_shiftTime = FunctionTransformer(functions.shiftTime(train))
t_rollingMeanShift = FunctionTransformer(functions.rollingMeanShift(train))
t_trendDiff = FunctionTransformer(functions.trendDiff(train))

In [10]:
feature_engineering = Pipeline([
    ('t_nflx_shiftTime', functions.shiftTime),
    # ('t_nflx_rollingMeanShift', t_rollingMeanShift),
    # ('t_nflx_trendDiff', t_trendDiff)
    ], remainder='passthrough')

In [11]:
# # function transform gspc engineering functions
# t_gspc_shiftTime = FunctionTransformer(functions.shiftTime(train, gspc_target))
# t_gspc_rollingMeanShift = FunctionTransformer(functions.rollingMeanShift(train, gspc_target))
# t_gspc_trendDiff = FunctionTransformer(functions.trendDiff(train, gspc_target))

In [12]:
# gspc_engineering = Pipeline([
#     ('t_gspc_shiftTime', t_gspc_shiftTime),
#     ('t_gspc_rollingMeanShift', t_gspc_rollingMeanShift),
#     ('t_gspc_trendDiff', t_gspc_trendDiff)
#     ])

In [13]:
# dropping NA values
t_dropNa = FunctionTransformer(functions.dropNa)

# drop target variable
t_dropTarget = FunctionTransformer(functions.dropTarget)


In [14]:
feature_engineering = Pipeline([
    ('feature_engineering', feature_engineering),
    ])

In [15]:
pipeline = Pipeline([
    ('feature_engineering', feature_engineering),
    ('dropna', t_dropNa),
    ('scaling', StandardScaler()),
    ('select_best', SelectKBest(f_regression,k=4)),
    ('classifier', LinearRegression())
    ], remmainder='passthrough')

In [16]:
model = pipeline.fit(x_train, y_train)

TypeError: Last step of Pipeline should implement fit or be the string 'passthrough'. '<class 'functions.shiftTime'>' (type <class 'type'>) doesn't

In [None]:
model.predict(x_test)

array([519.93165419, 515.13765257, 513.43882744, 509.15958829,
       518.98343066, 519.6159568 , 518.85444129, 524.18338946,
       543.05446233, 545.52773089, 552.04683535, 552.87154219,
       548.64361163, 551.22719768, 560.0288754 , 564.72025031,
       565.30277002, 586.97499446, 595.98236645, 588.64534604,
       609.76586392, 607.00241193, 601.03251326, 603.81094813,
       586.08609811, 579.46222098, 581.21259912, 581.68498377,
       584.28674858, 575.88939991, 574.15661451, 592.70994258,
       596.41803448, 585.8720597 , 583.83602992, 590.61540794,
       605.24655719, 616.94365526, 607.67219355, 609.00264032,
       634.19331011, 636.22448775, 636.62859174, 639.35254031,
       633.14548375, 628.04036079, 624.15526084, 631.66569137,
       628.96131212, 628.11405196, 636.50431317, 628.54073504,
       649.16887027, 662.87655082, 668.30656577, 667.2202547 ,
       665.22742723, 673.32113031, 686.15760684, 679.81269752,
       679.40645008, 687.06514894, 669.746172  , 650.30

In [None]:
mean_squared_error

In [None]:
train

Unnamed: 0_level_0,nflx_High,nflx_Low,nflx_Open,nflx_Close,nflx_Volume,nflx_Adj Close,gspc_High,gspc_Low,gspc_Open,gspc_Close,gspc_Volume,gspc_Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2017-08-07,182.270004,179.100006,181.000000,181.330002,4609800,181.330002,2480.949951,2475.879883,2477.139893,2480.909912,2931780000,2480.909912
2017-08-08,181.910004,177.449997,181.369995,178.360001,6104000,178.360001,2490.870117,2470.320068,2478.350098,2474.919922,3344640000,2474.919922
2017-08-09,175.960007,170.009995,171.429993,175.779999,9670500,175.779999,2474.409912,2462.080078,2465.350098,2474.020020,3308060000,2474.020020
2017-08-10,174.449997,167.600006,174.029999,169.139999,9693100,169.139999,2465.379883,2437.750000,2465.379883,2438.209961,3621070000,2438.209961
2017-08-11,172.580002,169.000000,169.860001,171.399994,5022300,171.399994,2448.090088,2437.850098,2441.040039,2441.320068,3159930000,2441.320068
...,...,...,...,...,...,...,...,...,...,...,...,...
2021-08-02,519.849976,510.510010,519.000000,515.150024,2096600,515.150024,4422.180176,4384.810059,4406.859863,4387.160156,2919940000,4387.160156
2021-08-03,515.630005,505.369995,514.390015,510.820007,2579400,510.820007,4423.790039,4373.000000,4392.740234,4423.149902,3305340000,4423.149902
2021-08-04,517.979980,510.369995,513.000000,517.349976,2039400,517.349976,4416.169922,4400.229980,4415.950195,4402.660156,3382620000,4402.660156
2021-08-05,525.409973,514.020020,517.130005,524.890015,2556700,524.890015,4429.759766,4408.859863,4408.859863,4429.100098,2734220000,4429.100098
