In [1]:
import pandas as pd
import numpy as np
import fix_yahoo_finance as fyf
from pandas_datareader import data as web
from datetime import datetime
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import matplotlib.pyplot as plt
from sklearn import datasets, ensemble
from sklearn.inspection import permutation_importance
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

In [2]:
#fetching data
fyf.pdr_override()
today_date=datetime.today().strftime('%Y-%m-%d')
yf_data=web.get_data_yahoo('NFLX', start = '2013-01-01', end = today_date)
#bitcoin dataframe
netflix_df=pd.DataFrame(yf_data)
netflix_df.reset_index(inplace=True)
netflix_df

[*********************100%***********************]  1 of 1 downloaded


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2013-01-02,13.601429,13.687143,12.955714,13.144286,13.144286,19431300
1,2013-01-03,13.138571,13.988571,13.075714,13.798571,13.798571,27912500
2,2013-01-04,13.791429,13.958571,13.648571,13.711429,13.711429,17761100
3,2013-01-07,13.770000,14.535714,13.731429,14.171429,14.171429,45550400
4,2013-01-08,14.287143,14.427143,13.828571,13.880000,13.880000,24714900
...,...,...,...,...,...,...,...
2127,2021-06-15,501.230011,501.230011,490.399994,491.899994,491.899994,3104100
2128,2021-06-16,495.000000,496.459991,486.279999,492.410004,492.410004,3533200
2129,2021-06-17,490.250000,501.799988,490.149994,498.339996,498.339996,3198300
2130,2021-06-18,496.399994,504.489990,495.239990,500.769989,500.769989,5194200


In [3]:
#checking for null values
netflix_df.rename(columns={"Adj Close": "Adj_Close"}, inplace=True)
netflix_df.isnull().values.any()

False

### **Feature Engineering**

In [4]:
#simple moving average 10,30 days
def SMA(df, x):
  SMA_calc=df.Adj_Close.rolling(window=x, min_periods=x).mean()
  return SMA_calc
netflix_df['SMA10']=SMA(netflix_df,10)
netflix_df['SMA30']=SMA(netflix_df,30)

#exponential moving average 20,30,200 days
def EMA(df, x):
  EMA_calc=df.Adj_Close.ewm(span=x, min_periods=x, adjust=False).mean()
  return EMA_calc

netflix_df['EMA20']=EMA(netflix_df,20)
netflix_df['EMA30']=EMA(netflix_df,30)
netflix_df['EMA200']=EMA(netflix_df,200)
#netflix_df['EMA20']=netflix_df.Adj_Close.ewm(span=20, min_periods=20, adjust=False).mean()

In [5]:
#rate of change ROC 10,30 days
def ROC(df, n):
  ROC_diff=df.Adj_Close.diff(n)
  ROC_shift=df.Adj_Close.shift(n)
  #roc formula
  ROC_calc=((ROC_diff/ROC_shift)*100)
  return ROC_calc
  
netflix_df['ROC10']=ROC(netflix_df,10)
netflix_df['ROC30']=ROC(netflix_df,30)

In [6]:
#volume rate of change 10,30 days
def VROC(df, n):
  VROC_diff=df.Volume.diff(n)
  VROC_shift=df.Volume.shift(n)
  #roc formula
  VROC_calc=((VROC_diff/VROC_shift)*100)
  return VROC_calc
  
netflix_df['VROC10']=VROC(netflix_df,10)
netflix_df['VROC30']=VROC(netflix_df,30)

In [7]:
#relative strength index RSI 14 days
def RSI(df, n=14):
  delta=df.Adj_Close.diff()
  dup, ddown=delta.copy(), delta.copy()
  #average gains and losses
  dup[dup<0]=0
  ddown[ddown>0]=0
  rolup=dup.rolling(window=n, min_periods=n).mean()
  roldown=ddown.rolling(window=n, min_periods=n).mean().abs()
  #relative strength
  RS_calc=rolup/roldown
  #rsi formula
  RSI_calc=(100-(100/(1+RS_calc)))
  return RSI_calc

netflix_df['RSI14']=RSI(netflix_df)

In [8]:
#RSI indicators %30 %70
def RSI_Ind(df):
  rsi_vals=[]
  for i in df['RSI14'].iloc[:]:
    if i <= 30:
      rsi_vals.append(0)
    elif i >= 70:
      rsi_vals.append(1)
    else:
      rsi_vals.append(2)
  return rsi_vals

netflix_df['RSI14_IND']=RSI_Ind(netflix_df)

In [9]:
#STO%K%D Stochastic oscillator 14,200 days
def STOK(close, low, high, n):
  STOK_calc=((close-low.rolling(n).min())/(high.rolling(n).max()-low.rolling(n).min()))*100
  return STOK_calc

netflix_df['STOK14']=STOK(netflix_df.Adj_Close, netflix_df.Low, netflix_df.High, 14)
netflix_df['STOK200']=STOK(netflix_df.Adj_Close, netflix_df.Low, netflix_df.High, 200)

def STOD(stod, n):
  STOD_calc=stod.rolling(3).mean()
  return STOD_calc

netflix_df['STOD14']=STOD(netflix_df.STOK14, 14)
netflix_df['STOD14']=STOD(netflix_df.STOK200, 200)

In [10]:
#bollinger bands upper
def BBandsUp(close, n, mult):
  BBandsUp_calc=close.rolling(window=n, min_periods=n).mean() + close.rolling(window=n, min_periods=n).std() * mult
  return BBandsUp_calc

netflix_df['BBandUpper']=BBandsUp(netflix_df.Adj_Close,20,2)
#bollinger bands lower
def BBandsLow(close, n, mult):
  BBandsLow_calc=close.rolling(window=n, min_periods=n).mean() - close.rolling(window=n, min_periods=n).std() * mult
  return BBandsLow_calc

netflix_df['BBandLower']=BBandsLow(netflix_df.Adj_Close,20,2)

In [11]:
#filling NaNs of feature engineered columns at the end not to alter calculations
netflix_df.fillna(method='bfill', inplace=True)

In [12]:
#dataframe after feature engineering phase
netflix_df

Unnamed: 0,Date,Open,High,Low,Close,Adj_Close,Volume,SMA10,SMA30,EMA20,EMA30,EMA200,ROC10,ROC30,VROC10,VROC30,RSI14,RSI14_IND,STOK14,STOK200,STOD14,BBandUpper,BBandLower
0,2013-01-02,13.601429,13.687143,12.955714,13.144286,13.144286,19431300,14.018286,19.336524,17.663839,20.831702,32.623922,5.945002,103.673513,44.173061,64.577254,66.686449,2,51.556854,95.233994,97.397107,24.702632,8.040940
1,2013-01-03,13.138571,13.988571,13.075714,13.798571,13.798571,27912500,14.018286,19.336524,17.663839,20.831702,32.623922,5.945002,103.673513,44.173061,64.577254,66.686449,2,51.556854,95.233994,97.397107,24.702632,8.040940
2,2013-01-04,13.791429,13.958571,13.648571,13.711429,13.711429,17761100,14.018286,19.336524,17.663839,20.831702,32.623922,5.945002,103.673513,44.173061,64.577254,66.686449,2,51.556854,95.233994,97.397107,24.702632,8.040940
3,2013-01-07,13.770000,14.535714,13.731429,14.171429,14.171429,45550400,14.018286,19.336524,17.663839,20.831702,32.623922,5.945002,103.673513,44.173061,64.577254,66.686449,2,51.556854,95.233994,97.397107,24.702632,8.040940
4,2013-01-08,14.287143,14.427143,13.828571,13.880000,13.880000,24714900,14.018286,19.336524,17.663839,20.831702,32.623922,5.945002,103.673513,44.173061,64.577254,66.686449,2,51.556854,95.233994,97.397107,24.702632,8.040940
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2127,2021-06-15,501.230011,501.230011,490.399994,491.899994,491.899994,3104100,492.409998,494.945331,495.468385,498.510097,501.116116,-1.438646,-3.380407,25.034238,-24.140375,41.190743,2,32.950644,24.723435,25.926201,508.170545,482.828451
2128,2021-06-16,495.000000,496.459991,486.279999,492.410004,492.410004,3533200,491.726999,494.586331,495.177111,498.116543,501.029488,-1.368077,-2.140385,55.716175,-18.767674,40.625613,2,34.672487,25.102090,26.827039,507.817443,483.794554
2129,2021-06-17,490.250000,501.799988,490.149994,498.339996,498.339996,3198300,492.617999,494.661665,495.478338,498.130959,501.002727,1.820486,0.455574,-17.726501,2.201700,45.200012,2,54.692720,29.504787,26.443438,507.766116,484.909879
2130,2021-06-18,496.399994,504.489990,495.239990,500.769989,500.769989,5194200,493.220999,494.702332,495.982305,498.301219,501.000411,1.218822,0.244220,64.827214,37.278325,48.267656,2,80.060089,31.308926,28.638601,507.639506,484.946487


### **Data Visualization**

In [13]:
#NFLX Chart
candle_fig=make_subplots(rows=2, cols=1,
                         row_heights=[0.7,0.3],
                         vertical_spacing=0.08,
                         )

#BBands traces
candle_fig.add_trace(go.Scatter(x=netflix_df['Date'],
                                y=netflix_df['BBandUpper'],
                                mode='lines', name='BBandUpper', 
                                marker_color='rgb(175, 175, 182)',
                                ),row=1, col=1)
candle_fig.add_trace(go.Scatter(x=netflix_df['Date'],
                                y=netflix_df['BBandLower'],
                                mode='lines', name='BBandLower', 
                                marker_color='rgb(175, 175, 182)',
                                fillcolor='rgba(175, 175, 182, 0.2)',
                                fill='tonexty'
                                ),row=1, col=1)
#Candle trace
candle_fig.add_trace(go.Candlestick(x=netflix_df['Date'],
                                    open=netflix_df['Open'],
                                    high=netflix_df['High'],
                                    low=netflix_df['Low'],
                                    close=netflix_df['Close'],
                                    name='NFLX',
                                    increasing_line_color= 'rgb(0, 230, 0)',
                                    decreasing_line_color= 'rgb(255, 0, 0)'
                                    ),row=1, col=1)
#SMA traces
candle_fig.add_trace(go.Scatter(x=netflix_df['Date'],
                                y=netflix_df['SMA10'],
                                mode='lines', name='SMA10', 
                                marker_color='rgb(255, 153, 0)',
                                visible='legendonly'
                                ),row=1, col=1)
candle_fig.add_trace(go.Scatter(x=netflix_df['Date'],
                                y=netflix_df['SMA30'],
                                mode='lines', name='SMA30', 
                                marker_color='rgb(255, 191, 0)',
                                visible='legendonly'
                                ),row=1, col=1)
#EMA traces
candle_fig.add_trace(go.Scatter(x=netflix_df['Date'],
                                y=netflix_df['EMA20'],
                                mode='lines', name='EMA20', 
                                marker_color='rgb(128, 255, 0)',
                                visible='legendonly'
                                ),row=1, col=1)
candle_fig.add_trace(go.Scatter(x=netflix_df['Date'],
                                y=netflix_df['EMA30'],
                                mode='lines', name='EMA30', 
                                marker_color='rgb(64, 255, 0)',
                                visible='legendonly'
                                ),row=1, col=1)
candle_fig.add_trace(go.Scatter(x=netflix_df['Date'],
                                y=netflix_df['EMA200'],
                                mode='lines', name='EMA200', 
                                marker_color='rgb(0, 255, 64)'
                                ),row=1, col=1)
#Volume trace
candle_fig.add_trace(go.Bar(x=netflix_df['Date'],
                            y=netflix_df['Volume'],
                            name='Volume',
                            marker_color='rgb(255, 255, 0)'
                            ),row=2, col=1)

#customization
candle_fig.update_layout(
    template='plotly_dark',
    yaxis=dict(title_text='Currency in USD', title_standoff=5),
    autosize=False,
    width=1400,
    height=800,
    xaxis_rangeslider_visible=False,
    title_text='Netflix(NFLX) NasdaqGS - Chart',
    title_x=0.5)
candle_fig.update_yaxes(title_text='Volume', row=2, col=1,
                        title_standoff=1)

candle_fig.show()

In [14]:
#RSI Chart
rsi_fig=make_subplots(rows=1, cols=2,
                      column_widths=[0.7,0.3],
                      horizontal_spacing=0.05,
                      )
#rsi14 trace
rsi_fig.add_trace(go.Scatter(x=netflix_df['Date'],
                                y=netflix_df['RSI14'],
                                mode='lines', name='RSI14', 
                                marker_color='rgb(26, 140, 255)',
                                ),row=1, col=1)

#indicators trace
ind=netflix_df.RSI14_IND.to_list()
oversold_list=list(filter(lambda a: a == 0, ind))
rsi_fig.add_trace(go.Histogram(x=oversold_list,
                               name='Oversold',
                               marker_color='rgb(255, 255, 255)',
                               marker_line_color='rgb(26, 140, 255)',
                               marker_line_width=2,
                               opacity=0.75
                               ),row=1, col=2)
overvalued_list=list(filter(lambda a: a == 1, ind))
rsi_fig.add_trace(go.Histogram(x=overvalued_list,
                               name='Overbought',
                               marker_color='rgb(128, 128, 128)',
                               marker_line_color='rgb(26, 140, 255)',
                               marker_line_width=2,
                               opacity=0.75
                               ),row=1, col=2)
#customization
rsi_fig.update_layout(
    template='plotly_dark',
    yaxis=dict(title_text='RSI',
               title_standoff=5,
               tickvals=[30,70]),
    autosize=False,
    width=1400,
    height=600,
    title_text='Netflix(NFLX) Relative Strength Index (14 Days) & Indicators - Chart',
    title_x=0.5)
rsi_fig.update_xaxes(visible=False, row=1, col=2)

rsi_fig.show()

### Machine Learning (Forecasting)

In [15]:
#computing pearson correlation of columns in fe dataframe
corr=netflix_df.corr(method='pearson')
corr.style.background_gradient(cmap='coolwarm')

Unnamed: 0,Open,High,Low,Close,Adj_Close,Volume,SMA10,SMA30,EMA20,EMA30,EMA200,ROC10,ROC30,VROC10,VROC30,RSI14,RSI14_IND,STOK14,STOK200,STOD14,BBandUpper,BBandLower
Open,1.0,0.999727,0.999717,0.999399,0.999399,-0.437201,0.998131,0.994647,0.997401,0.996148,0.980732,-0.066529,-0.159246,0.002415,-0.023225,-0.052913,0.147903,-0.021845,-0.07657,-0.083686,0.995551,0.993232
High,0.999727,1.0,0.999641,0.999731,0.999731,-0.433651,0.998127,0.994714,0.997432,0.996196,0.980765,-0.06527,-0.158768,0.007256,-0.018394,-0.051855,0.148046,-0.018851,-0.076521,-0.084718,0.995827,0.99299
Low,0.999717,0.999641,1.0,0.999716,0.999716,-0.440803,0.997954,0.994476,0.997241,0.995989,0.980833,-0.063658,-0.157681,-0.001249,-0.027171,-0.049373,0.147847,-0.015622,-0.074638,-0.083229,0.995081,0.993397
Close,0.999399,0.999731,0.999716,1.0,1.0,-0.436826,0.997857,0.994406,0.997165,0.995913,0.980629,-0.062073,-0.157033,0.004155,-0.021631,-0.048265,0.147824,-0.012212,-0.074133,-0.083853,0.995274,0.993011
Adj_Close,0.999399,0.999731,0.999716,1.0,1.0,-0.436826,0.997857,0.994406,0.997165,0.995913,0.980629,-0.062073,-0.157033,0.004155,-0.021631,-0.048265,0.147824,-0.012212,-0.074133,-0.083853,0.995274,0.993011
Volume,-0.437201,-0.433651,-0.440803,-0.436826,-0.436826,1.0,-0.438513,-0.437657,-0.438464,-0.437862,-0.435614,0.230343,0.2319,0.524176,0.52488,0.05795,-0.129058,0.028609,0.096645,0.114063,-0.429979,-0.446738
SMA10,0.998131,0.998127,0.997954,0.997857,0.997857,-0.438513,1.0,0.99748,0.999442,0.998549,0.983621,-0.098618,-0.173168,0.000588,-0.02396,-0.079574,0.147747,-0.055329,-0.092236,-0.097546,0.997877,0.996209
SMA30,0.994647,0.994714,0.994476,0.994406,0.994406,-0.437657,0.99748,1.0,0.999229,0.999709,0.988481,-0.12303,-0.205416,-0.002452,-0.025643,-0.117619,0.147569,-0.080586,-0.120026,-0.125752,0.997451,0.997557
EMA20,0.997401,0.997432,0.997241,0.997165,0.997165,-0.438464,0.999442,0.999229,1.0,0.999752,0.986776,-0.108198,-0.187439,-0.000578,-0.024619,-0.096313,0.147571,-0.064837,-0.105299,-0.110968,0.998333,0.997446
EMA30,0.996148,0.996196,0.995989,0.995913,0.995913,-0.437862,0.998549,0.999709,0.999752,1.0,0.989049,-0.114327,-0.197538,-0.001358,-0.025144,-0.106049,0.147701,-0.072154,-0.117379,-0.123102,0.99796,0.997485


In [16]:
#selecting only columns that have strongest linear associations
netflix_ml_df=netflix_df[['Adj_Close', 'SMA10', 'SMA30', 'EMA20', 'EMA30', 'EMA200', 'BBandUpper', 'BBandLower']]
corr2=netflix_ml_df.corr(method='pearson')
corr2.style.background_gradient(cmap='coolwarm')

Unnamed: 0,Adj_Close,SMA10,SMA30,EMA20,EMA30,EMA200,BBandUpper,BBandLower
Adj_Close,1.0,0.997857,0.994406,0.997165,0.995913,0.980629,0.995274,0.993011
SMA10,0.997857,1.0,0.99748,0.999442,0.998549,0.983621,0.997877,0.996209
SMA30,0.994406,0.99748,1.0,0.999229,0.999709,0.988481,0.997451,0.997557
EMA20,0.997165,0.999442,0.999229,1.0,0.999752,0.986776,0.998333,0.997446
EMA30,0.995913,0.998549,0.999709,0.999752,1.0,0.989049,0.99796,0.997485
EMA200,0.980629,0.983621,0.988481,0.986776,0.989049,1.0,0.983474,0.985479
BBandUpper,0.995274,0.997877,0.997451,0.998333,0.99796,0.983474,1.0,0.992495
BBandLower,0.993011,0.996209,0.997557,0.997446,0.997485,0.985479,0.992495,1.0


In [17]:
#getting correct indexs
last_ind=netflix_ml_df.tail(1).index.item()
starting_ind=last_ind-63
#only last 3 months worth of data (63 days on average)
netflix_mlr_df=netflix_ml_df.loc[starting_ind:last_ind, :]

In [18]:
netflix_mlr_df.reset_index(inplace=True)
netflix_mlr_df.drop('index', axis=1, inplace=True)
netflix_mlr_df



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,Adj_Close,SMA10,SMA30,EMA20,EMA30,EMA200,BBandUpper,BBandLower
0,523.109985,516.086005,533.389006,523.039443,526.167156,495.018395,559.205503,489.408505
1,535.090027,518.951007,532.961340,524.187118,526.742825,495.417118,557.527521,489.980487
2,520.809998,520.578006,531.686007,523.865488,526.360062,495.669783,552.883317,491.364693
3,502.859985,518.558005,529.661672,521.864964,524.843928,495.741328,549.544119,490.319889
4,508.049988,517.561002,528.010337,520.549252,523.760448,495.863802,547.049797,489.734212
...,...,...,...,...,...,...,...,...
59,491.899994,492.409998,494.945331,495.468385,498.510097,501.116116,508.170545,482.828451
60,492.410004,491.726999,494.586331,495.177111,498.116543,501.029488,507.817443,483.794554
61,498.339996,492.617999,494.661665,495.478338,498.130959,501.002727,507.766116,484.909879
62,500.769989,493.220999,494.702332,495.982305,498.301219,501.000411,507.639506,484.946487


In [29]:
y_lr=netflix_mlr_df['Adj_Close']
X_lr=netflix_mlr_df[['SMA10', 'SMA30', 'EMA20', 'EMA30', 'EMA200', 'BBandUpper', 'BBandLower']]

In [30]:
#multiple linear regression
from sklearn.linear_model import LinearRegression
linear_regression=LinearRegression()

In [49]:
x_train, x_test,y_train,y_test = train_test_split(X_lr,y_lr,test_size=0.1)
#fitting
linear_regression.fit(x_train, y_train)
#prediction
pred=linear_regression.predict(x_test)

#score
linear_regression.score(X_lr, y_lr)

0.9145575555921714

In [50]:
co=linear_regression.coef_
co

array([-2.91968851, -0.41948696, 13.14401423, -7.14888872, -0.41379767,
       -1.01791537, -0.91560506])

In [51]:
inter=linear_regression.intercept_
inter

358.11922076955375

[<img src="https://csharpcorner-mindcrackerinc.netdna-ssl.com/article/linear-regression2/Images/f_MLR.png">]

In [41]:
y=inter+co[0]*(64)+co[1]*(64)+co[2]*(64)+co[3]*(64)+co[4]*(64)+co[5]*(64)+co[6]*(64)
y

463.0939144539567

In [36]:
pred

array([522.09153817, 495.03581818, 545.38202517, 503.23437408,
       495.88397122, 526.26999634, 498.90574136])

In [37]:
y_test

1     535.090027
59    491.899994
16    540.020020
25    505.549988
61    498.339996
8     539.419983
44    502.899994
Name: Adj_Close, dtype: float64