In [2]:
#Install the libraries and the dependencies
#Import Data analysis libraries
import numpy as np
import pandas as pd
#Import Machine Learning Libraries
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
#Import simple plot library
import matplotlib.pyplot as plt
#Import iPlot libraries
from plotly import __version__
import cufflinks as cf
from plotly.offline import download_plotlyjs,init_notebook_mode,plot,iplot
init_notebook_mode(connected=True)
#Import error library
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import max_error
#Use cufflinks offline
cf.go_offline()
#Jupyter interactive plot
%matplotlib inline

# NETFLIX 10% FUTURE 25% TEST

In [69]:
#Load the data
netflix = pd.read_csv('NFLX.csv')

#Print out statistical features of the dat set
print('"Statistical features of Netflix"')
print(netflix.describe())
print()

#Print out the file's rows and columns
print("Netflix's data set rows,columns")
print(netflix.shape)
print()

#Convert object type to float
netflix['Close'] = pd.to_numeric(netflix['Close'], errors='coerce')

#Plot the stock data
netflix.iplot(x='Date',y='Close',title='Netflix Stock',xTitle='Date',yTitle='Close Price ($)',fill=True,colors=['green'],theme='white')

#Get the close price
netflix = netflix[['Close']]

#Create a variable to predict the 'x' days out into the future 10%
future_days = 103

#Create a new column (target) shifted 'x' units/days up
netflix['Prediction']= netflix[['Close']].shift(-future_days)

#Create the feature data set (X) and convert it to a numpy array and remove the last 'x' rows/days
Xnetflix = np.array(netflix.drop(['Prediction'],axis=1)[:-future_days])

#Create the target data set (Y) and convert it to a numpy array and get all of the target values except the last 'x' rows/days
Ynetflix = np.array(netflix['Prediction'])[:-future_days]

#Split the data into 75% training and 25% testing
x_train_netflix, x_test_netflix, y_train_netflix, y_test_netflix = train_test_split(Xnetflix, Ynetflix, test_size=0.25)

#Create the models
#Create the decision tree regressor model
tree_netflix = DecisionTreeRegressor().fit(x_train_netflix,y_train_netflix)
#Create the linear regression model
lr_netflix = LinearRegression().fit(x_train_netflix, y_train_netflix)

#Get the last 'x' rows of the feature data set
x_future_netflix = netflix.drop(['Prediction'],1)[:-future_days]
x_future_netflix = x_future_netflix.tail(future_days)
x_future_netflix = np.array(x_future_netflix)
x_future_netflix

#Show the model tree prediction
tree_prediction_netflix = tree_netflix.predict(x_future_netflix)
print('Tree prediction preview')
print(tree_prediction_netflix)
print()
#Show the model linear regression prediction
lr_prediction_netflix = lr_netflix.predict(x_future_netflix)
print('Linear regression prediction preview')
print(lr_prediction_netflix)
print()

#Compute the mean absolute error for Tree Regression predictions
mae_tree_netflix = mean_absolute_error(netflix['Close'].iloc[-(future_days+1):-1],tree_prediction_netflix.tolist())
print('Mean absolute error for Tree Decision')
print(round(mae_tree_netflix,3))
print()
#Compute the mean absolute error for Linear Regression predictions
mae_lr_netflix = mean_absolute_error(netflix['Close'].iloc[-(future_days+1):-1],lr_prediction_netflix.tolist())
print('Mean absolute error for Linear Regression')
print(round(mae_lr_netflix,3))
print()

#Compute the r2 score for Tree Regression predictions
r2_tree_netflix = r2_score(netflix['Close'].iloc[-(future_days+1):-1],tree_prediction_netflix.tolist())
print('r2 score for Tree Decision')
print(round(r2_tree_netflix,3))
print()
#Compute the r2 score for Linear Regression predictions
r2_lr_netflix = r2_score(netflix['Close'].iloc[-(future_days+1):-1],lr_prediction_netflix.tolist())
print('r2 score for Linear Regression')
print(round(r2_lr_netflix,3))
print()

#Compute the max error for Tree Regression predictions
maxerr_tree_netflix = max_error(netflix['Close'].iloc[-(future_days+1):-1],tree_prediction_netflix.tolist())
print('max error for Tree Decision')
print(round(maxerr_tree_netflix,3))
print()
#Compute the max error for Linear Regression predictions
maxerr_lr_netflix = max_error(netflix['Close'].iloc[-(future_days+1):-1],lr_prediction_netflix.tolist())
print('max error for Linear Regression')
print(round(maxerr_lr_netflix,3))
print()


#Visualize the data of tree decision
predictions1_netflix = tree_prediction_netflix
valid1_netflix = netflix[Xnetflix.shape[0]:]
valid1_netflix['Predictions'] = predictions1_netflix
tree_final_netflix=pd.concat([netflix['Close'],valid1_netflix['Predictions']],axis=1)
tree_final_netflix[['Close','Predictions']].iplot(kind='spread',theme='white',title='Netflix Stock Pediction with Tree Decision',xTitle='Days',yTitle='Close Price ($)')


#Visualize the data of linear regression
predictions2_netflix = lr_prediction_netflix
valid2_netflix = netflix[Xnetflix.shape[0]:]
valid2_netflix['Predictions'] = predictions2_netflix
lr_final_netflix=pd.concat([netflix['Close'],valid2_netflix['Predictions']],axis=1)
lr_final_netflix[['Close','Predictions']].iplot(kind='spread',theme='white',title='Netflix Stock Prediction with Linear Regression ',xTitle='Days',yTitle='Close Price ($)')

"Statistical features of Netflix"
             Open        High         Low       Close   Adj Close  \
count  254.000000  254.000000  254.000000  254.000000  254.000000   
mean   371.885157  378.668819  365.765336  372.772992  372.772992   
std     76.348062   77.895003   74.679086   76.540696   76.540696   
min    255.710007  265.000000  252.279999  254.589996  254.589996   
25%    300.890007  305.680007  295.007492  299.822495  299.822495   
50%    363.539994  370.944992  356.449997  363.534988  363.534988   
75%    435.560005  441.442497  427.247490  434.425011  434.425011   
max    567.979980  575.369995  520.960022  548.729980  548.729980   

             Volume  
count  2.540000e+02  
mean   7.685473e+06  
std    4.509276e+06  
min    2.019300e+06  
25%    4.920875e+06  
50%    6.447200e+06  
75%    8.762150e+06  
max    3.825890e+07  

Netflix's data set rows,columns
(254, 7)



Tree prediction preview
[387.779999 370.079987 361.76001  379.959991 372.279999 371.119995
 380.01001  396.720001 413.549988 426.75     439.170013 437.48999
 437.48999  428.149994 421.420013 426.700012 424.98999  421.380005
 403.829987 411.890015 419.850006 415.269989 428.149994 424.679993
 415.269989 437.48999  435.549988 440.519989 431.820007 438.269989
 441.950012 454.190002 413.440002 451.040009 447.670013 436.25
 429.320007 414.769989 419.890015 413.440002 413.440002 425.920013
 427.309998 421.970001 414.329987 418.070007 419.48999  434.049988
 434.480011 425.559998 418.070007 425.559998 436.130005 425.559998
 449.869995 453.720001 468.040009 466.26001  453.720001 465.910004
 453.720001 447.23999  455.040009 495.649994 495.649994 493.809998
 493.809998 502.779999 507.76001  502.410004 488.51001  502.410004
 523.26001  527.390015 527.390015 502.410004 490.100006 455.040009
 477.579987 480.450012 495.649994 488.51001  484.480011 485.799988
 507.76001  498.619995 447.23999  502.10998

TREE

In [59]:
#Install the libraries and the dependencies
#Import Data analysis libraries
import numpy as np
import pandas as pd
#Import Machine Learning Libraries
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
#Import simple plot library
import matplotlib.pyplot as plt
#Import iPlot libraries
from plotly import __version__
import cufflinks as cf
from plotly.offline import download_plotlyjs,init_notebook_mode,plot,iplot
init_notebook_mode(connected=True)
#Import error library
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import max_error

#Use cufflinks offline
cf.go_offline()

#Jupyter interactive plot
%matplotlib inline

#Load the data
netflix = pd.read_csv('NFLX.csv')

#Print out statistical features of the dat set
print('"Statistical features of Netflix"')
print(netflix.describe())
print()

#Print out the file's rows and columns
print("Netflix's data set rows,columns")
print(netflix.shape)
print()

#Convert object type to float
netflix['Close'] = pd.to_numeric(netflix['Close'], errors='coerce')

#Plot the stock data
netflix.iplot(x='Date',y='Close',title='Netflix Stock',xTitle='Date',yTitle='Close Price ($)',fill=True,colors=['green'],theme='white')

#Get the close price
netflix = netflix[['Close']]

#Create a variable to predict the 'x' days out into the future 10%
future_days = 13

#Create a new column (target) shifted 'x' units/days up
netflix['Prediction']= netflix[['Close']].shift(-future_days)

#Create the feature data set (X) and convert it to a numpy array and remove the last 'x' rows/days
Xnetflix = np.array(netflix.drop(['Prediction'],axis=1)[:-future_days])

#Create the target data set (Y) and convert it to a numpy array and get all of the target values except the last 'x' rows/days
Ynetflix = np.array(netflix['Prediction'])[:-future_days]

#Split the data into 75% training and 25% testing
x_train_netflix, x_test_netflix, y_train_netflix, y_test_netflix = train_test_split(Xnetflix, Ynetflix, test_size=0.25)

#Create the decision tree regressor model
tree_netflix = DecisionTreeRegressor().fit(x_train_netflix,y_train_netflix)

#Get the last 'x' rows of the feature data set
x_future_netflix = netflix.drop(['Prediction'],1)[:-future_days]
x_future_netflix = x_future_netflix.tail(future_days)
x_future_netflix = np.array(x_future_netflix)
x_future_netflix

#Show the model tree prediction
tree_prediction_netflix = tree_netflix.predict(x_future_netflix)
print('Tree prediction preview')
print(tree_prediction_netflix)
print()

#Compute the mean absolute error for Tree Regression predictions
mae_tree_netflix = mean_absolute_error(netflix['Close'].iloc[-(future_days+1):-1],tree_prediction_netflix.tolist())
print('Mean absolute error for Tree Decision')
print(round(mae_tree_netflix,3))
print()

#Compute the r2 score for Tree Regression predictions
r2_tree_netflix = r2_score(netflix['Close'].iloc[-(future_days+1):-1],tree_prediction_netflix.tolist())
print('r2 score for Tree Decision')
print(round(r2_tree_netflix,3))
print()

#Compute the max error for Tree Regression predictions
maxerr_tree_netflix = max_error(netflix['Close'].iloc[-(future_days+1):-1],tree_prediction_netflix.tolist())
print('max error for Tree Decision')
print(round(maxerr_tree_netflix,3))
print()

#Visualize the data of tree decision
predictions1_netflix = tree_prediction_netflix
valid1_netflix = netflix[Xnetflix.shape[0]:]
valid1_netflix['Predictions'] = predictions1_netflix
tree_final_netflix=pd.concat([netflix['Close'],valid1_netflix['Predictions']],axis=1)
tree_final_netflix[['Close','Predictions']].iplot(kind='spread',theme='white',title='Netflix Stock Pediction with Tree Decision',xTitle='Days',yTitle='Close Price ($)')


"Statistical features of Netflix"
             Open        High         Low       Close   Adj Close  \
count  254.000000  254.000000  254.000000  254.000000  254.000000   
mean   371.885157  378.668819  365.765336  372.772992  372.772992   
std     76.348062   77.895003   74.679086   76.540696   76.540696   
min    255.710007  265.000000  252.279999  254.589996  254.589996   
25%    300.890007  305.680007  295.007492  299.822495  299.822495   
50%    363.539994  370.944992  356.449997  363.534988  363.534988   
75%    435.560005  441.442497  427.247490  434.425011  434.425011   
max    567.979980  575.369995  520.960022  548.729980  548.729980   

             Volume  
count  2.540000e+02  
mean   7.685473e+06  
std    4.509276e+06  
min    2.019300e+06  
25%    4.920875e+06  
50%    6.447200e+06  
75%    8.762150e+06  
max    3.825890e+07  

Netflix's data set rows,columns
(254, 7)



Tree prediction preview
[483.380005 466.929993 475.470001 481.329987 482.679993 482.350006
 491.869995 484.529999 481.329987 492.309998 509.079987 490.579987
 535.390015]

Mean absolute error for Tree Decision
10.222

r2 score for Tree Decision
-2.52

max error for Tree Decision
44.81



LINEAR

In [60]:
#Install the libraries and the dependencies
#Import Data analysis libraries
import numpy as np
import pandas as pd
#Import Machine Learning Libraries
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
#Import simple plot library
import matplotlib.pyplot as plt
#Import iPlot libraries
from plotly import __version__
import cufflinks as cf
from plotly.offline import download_plotlyjs,init_notebook_mode,plot,iplot
init_notebook_mode(connected=True)
#Import error library
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import max_error

#Use cufflinks offline
cf.go_offline()

#Jupyter interactive plot
%matplotlib inline

#Load the data
netflix = pd.read_csv('NFLX.csv')

#Print out statistical features of the dat set
print('"Statistical features of Netflix"')
print(netflix.describe())
print()

#Print out the file's rows and columns
print("Netflix's data set rows,columns")
print(netflix.shape)
print()

#Convert object type to float
netflix['Close'] = pd.to_numeric(netflix['Close'], errors='coerce')

#Plot the stock data
netflix.iplot(x='Date',y='Close',title='Netflix Stock',xTitle='Date',yTitle='Close Price ($)',fill=True,colors=['green'],theme='white')

#Get the close price
netflix = netflix[['Close']]

#Create a variable to predict the 'x' days out into the future 10%
future_days = 13

#Create a new column (target) shifted 'x' units/days up
netflix['Prediction']= netflix[['Close']].shift(-future_days)

#Create the feature data set (X) and convert it to a numpy array and remove the last 'x' rows/days
Xnetflix = np.array(netflix.drop(['Prediction'],axis=1)[:-future_days])

#Create the target data set (Y) and convert it to a numpy array and get all of the target values except the last 'x' rows/days
Ynetflix = np.array(netflix['Prediction'])[:-future_days]

#Split the data into 75% training and 25% testing
x_train_netflix, x_test_netflix, y_train_netflix, y_test_netflix = train_test_split(Xnetflix, Ynetflix, test_size=0.25)

#Create the linear regression model
lr_netflix = LinearRegression().fit(x_train_netflix, y_train_netflix)

#Get the last 'x' rows of the feature data set
x_future_netflix = netflix.drop(['Prediction'],1)[:-future_days]
x_future_netflix = x_future_netflix.tail(future_days)
x_future_netflix = np.array(x_future_netflix)
x_future_netflix

#Show the model linear regression prediction
lr_prediction_netflix = lr_netflix.predict(x_future_netflix)
print('Linear regression prediction preview')
print(lr_prediction_netflix)
print()

#Compute the mean absolute error for Linear Regression predictions
mae_lr_netflix = mean_absolute_error(netflix['Close'].iloc[-(future_days+1):-1],lr_prediction_netflix.tolist())
print('Mean absolute error for Linear Regression')
print(round(mae_lr_netflix,3))
print()

#Compute the r2 score for Linear Regression predictions
r2_lr_netflix = r2_score(netflix['Close'].iloc[-(future_days+1):-1],lr_prediction_netflix.tolist())
print('r2 score for Linear Regression')
print(round(r2_lr_netflix,3))
print()

#Compute the max error for Linear Regression predictions
maxerr_lr_netflix = max_error(netflix['Close'].iloc[-(future_days+1):-1],lr_prediction_netflix.tolist())
print('max error for Linear Regression')
print(round(maxerr_lr_netflix,3))
print()

#Visualize the data of linear regression
predictions2_netflix = lr_prediction_netflix
valid2_netflix = netflix[Xnetflix.shape[0]:]
valid2_netflix['Predictions'] = predictions2_netflix
lr_final_netflix=pd.concat([netflix['Close'],valid2_netflix['Predictions']],axis=1)
lr_final_netflix[['Close','Predictions']].iplot(kind='spread',theme='white',title='Netflix Stock Prediction with Linear Regression ',xTitle='Days',yTitle='Close Price ($)')

"Statistical features of Netflix"
             Open        High         Low       Close   Adj Close  \
count  254.000000  254.000000  254.000000  254.000000  254.000000   
mean   371.885157  378.668819  365.765336  372.772992  372.772992   
std     76.348062   77.895003   74.679086   76.540696   76.540696   
min    255.710007  265.000000  252.279999  254.589996  254.589996   
25%    300.890007  305.680007  295.007492  299.822495  299.822495   
50%    363.539994  370.944992  356.449997  363.534988  363.534988   
75%    435.560005  441.442497  427.247490  434.425011  434.425011   
max    567.979980  575.369995  520.960022  548.729980  548.729980   

             Volume  
count  2.540000e+02  
mean   7.685473e+06  
std    4.509276e+06  
min    2.019300e+06  
25%    4.920875e+06  
50%    6.447200e+06  
75%    8.762150e+06  
max    3.825890e+07  

Netflix's data set rows,columns
(254, 7)



Linear regression prediction preview
[495.51094198 483.82268825 486.56333576 501.07811975 494.25999816
 490.41166696 491.67214089 494.61331421 503.91423646 514.43748642
 507.24690166 513.90270385 500.19960855]

Mean absolute error for Linear Regression
12.674

r2 score for Linear Regression
-2.485

max error for Linear Regression
25.608



# TESLA 10% FUTURE 25% TEST

In [14]:
#Load the data
tesla = pd.read_csv('TSLA.csv')

#Print out statistical features of the dat set
print('"Statistical features of Tesla"')
print(tesla.describe())
print()

#Print out the file's rows and columns
print("Tesla's data set rows,columns")
print(tesla.shape)
print()

#Convert object type to float
tesla['Close'] = pd.to_numeric(tesla['Close'], errors='coerce')

#Plot the stock data
tesla.iplot(x='Date',y='Close',title='Tesla Stock',xTitle='Date',yTitle='Close Price ($)',fill=True,colors=['green'],theme='white')

#Get the close price
tesla = tesla[['Close']]

#Create a variable to predict the 'x' days out into the future (10%)
future_days = 126

#Create a new column (target) shifted 'x' units/days up
tesla['Prediction']= tesla[['Close']].shift(-future_days)

#Create the feature data set (X) and convert it to a numpy array and remove the last 'x' rows/days
Xtesla = np.array(tesla.drop(['Prediction'],axis=1)[:-future_days])

#Create the target data set (Y) and convert it to a numpy array and get all of the target values except the last 'x' rows/days
Ytesla = np.array(tesla['Prediction'])[:-future_days]

#Split the data into 75% training and 25% testing
x_train_tesla, x_test_tesla, y_train_tesla, y_test_tesla = train_test_split(Xtesla, Ytesla, test_size=0.25)

#Create the models
#Create the decision tree regressor model
tree_tesla = DecisionTreeRegressor().fit(x_train_tesla,y_train_tesla)
#Create the linear regression model
lr_tesla = LinearRegression().fit(x_train_tesla, y_train_tesla)

#Get the last 'x' rows of the feature data set
x_future_tesla = tesla.drop(['Prediction'],1)[:-future_days]
x_future_tesla = x_future_tesla.tail(future_days)
x_future_tesla = np.array(x_future_tesla)
x_future_tesla

#Show the model tree prediction
tree_prediction_tesla = tree_tesla.predict(x_future_tesla)
print('Tree prediction preview')
print(tree_prediction_tesla)
print()
#Show the model linear regression prediction
lr_prediction_tesla = lr_tesla.predict(x_future_tesla)
print('Linear regression prediction preview')
print(lr_prediction_tesla)
print()

#Compute the mean absolute error for Tree Regression predictions
mae_tree_tesla = mean_absolute_error(tesla['Close'].iloc[-(future_days+1):-1],tree_prediction_tesla.tolist())
print('Mean absolute error for Tree Decision')
print(round(mae_tree_tesla,3))
print()
#Compute the mean absolute error for Linear Regression predictions
mae_lr_tesla = mean_absolute_error(tesla['Close'].iloc[-(future_days+1):-1],lr_prediction_tesla.tolist())
print('Mean absolute error for Linear Regression')
print(round(mae_lr_tesla,3))
print()

#Compute the r2 score for Tree Regression predictions
r2_tree_tesla = r2_score(tesla['Close'].iloc[-(future_days+1):-1],tree_prediction_tesla.tolist())
print('r2 score for Tree Decision')
print(round(r2_tree_tesla,3))
print()
#Compute the r2 score for Linear Regression predictions
r2_lr_tesla = r2_score(tesla['Close'].iloc[-(future_days+1):-1],lr_prediction_tesla.tolist())
print('r2 score for Linear Regression')
print(round(r2_lr_tesla,3))
print()

#Compute the max error for Tree Regression predictions
maxerr_tree_tesla = max_error(tesla['Close'].iloc[-(future_days+1):-1],tree_prediction_tesla.tolist())
print('max error for Tree Decision')
print(round(maxerr_tree_tesla,3))
print()
#Compute the max error for Linear Regression predictions
maxerr_lr_tesla = max_error(tesla['Close'].iloc[-(future_days+1):-1],lr_prediction_tesla.tolist())
print('max error for Linear Regression')
print(round(maxerr_lr_tesla,3))
print()


#Visualize the data of tree decision
predictions1_tesla = tree_prediction_tesla
valid1_tesla = tesla[Xtesla.shape[0]:]
valid1_tesla['Predictions'] = predictions1_tesla
tree_final_tesla=pd.concat([tesla['Close'],valid1_tesla['Predictions']],axis=1)
tree_final_tesla[['Close','Predictions']].iplot(kind='spread',theme='white',title='Tesla Stock Pediction with Tree Decision',xTitle='Days',yTitle='Close Price ($)')


#Visualize the data of linear regression
predictions2_tesla = lr_prediction_tesla
valid2_tesla = tesla[Xtesla.shape[0]:]
valid2_tesla['Predictions'] = predictions2_tesla
lr_final_tesla=pd.concat([tesla['Close'],valid2_tesla['Predictions']],axis=1)
lr_final_tesla[['Close','Predictions']].iplot(kind='spread',theme='white',title='Tesla Stock Prediction with Linear Regression ',xTitle='Days',yTitle='Close Price ($)')

"Statistical features of Tesla"
              Open         High          Low        Close    Adj Close  \
count  1259.000000  1259.000000  1259.000000  1259.000000  1259.000000   
mean     72.587976    74.162791    70.996232    72.655696    72.655696   
std      55.970599    57.909565    54.109482    56.172243    56.172243   
min      28.464001    30.993999    28.209999    28.733999    28.733999   
25%      45.868000    46.482000    45.109002    45.923000    45.923000   
50%      59.034000    60.048000    57.754002    58.958000    58.958000   
75%      68.990002    69.897998    67.690002    68.935001    68.935001   
max     459.023987   463.697998   437.303986   447.750000   447.750000   

             Volume  
count  1.259000e+03  
mean   4.139873e+07  
std    3.031068e+07  
min    3.540000e+06  
25%    2.204450e+07  
50%    3.181200e+07  
75%    4.941925e+07  
max    3.046940e+08  

Tesla's data set rows,columns
(1259, 7)



Tree prediction preview
[102.611002  149.899994   90.601002   65.556     121.599998   41.784
  37.169998  112.110001  109.323997   89.014      90.893997   72.244003
  85.528      85.505997   86.858002   51.439999  107.849998  105.632004
 116.197998  116.197998   40.801998  130.190002   90.893997   88.601997
 103.248001   63.914001  109.767998  130.190002  130.190002   40.208
 145.966003  149.042007  150.778     149.272003  137.343994  125.6969985
 141.126007   67.919998  107.358     153.824005  160.102005  156.376007
 140.264008  152.238007  153.641998  156.516006  101.345997  163.884003
 162.257996  161.882004  158.192001  160.666     159.834      69.907997
 161.602005   62.716     165.520004  163.376007  163.774002  164.046005
  64.494003  167.        179.619995  176.311996  176.591995   62.02
 177.132004  189.983994   62.02       50.495998  194.567993   70.410004
 198.179993  196.425995   68.206001  200.792007  200.179993  200.356003
 200.356003  192.169998  197.195999  191.947998  

# LUFTHANSA 10% FUTURE 25% TEST

In [5]:
#Load the data
luf = pd.read_csv('LHAG.csv')

#Print out statistical features of the dat set
print('"Statistical features of Lufthansa"')
print(luf.describe())
print()

#Print out the file's rows and columns
print("Lufthansa's data set rows,columns")
print(luf.shape)
print()

#Convert object type to float
luf['Price'] = pd.to_numeric(luf['Price'], errors='coerce')

#Plot the stock data
luf.iplot(x='Date',y='Price',title='Lufthansa Stock',xTitle='Date',yTitle='Close Price ($)',fill=True,colors=['green'],theme='white')

#Get the close price
luf = luf[['Price']]

#Create a variable to predict the 'x' days out into the future (10%)
future_days = 270

#Create a new column (target) shifted 'x' units/days up
luf['Prediction']= luf[['Price']].shift(-future_days)

#Create the feature data set (X) and convert it to a numpy array and remove the last 'x' rows/days
Xluf = np.array(luf.drop(['Prediction'],axis=1)[:-future_days])

#Create the target data set (Y) and convert it to a numpy array and get all of the target values except the last 'x' rows/days
Yluf = np.array(luf['Prediction'])[:-future_days]

#Split the data into 75% training and 25% testing
x_train_luf, x_test_luf, y_train_luf, y_test_luf = train_test_split(Xluf, Yluf, test_size=0.25)

#Create the models
#Create the decision tree regressor model
tree_luf = DecisionTreeRegressor().fit(x_train_luf,y_train_luf)
#Create the linear regression model
lr_luf = LinearRegression().fit(x_train_luf, y_train_luf)

#Get the last 'x' rows of the feature data set
x_future_luf = luf.drop(['Prediction'],1)[:-future_days]
x_future_luf = x_future_luf.tail(future_days)
x_future_luf = np.array(x_future_luf)
x_future_luf

#Show the model tree prediction
tree_prediction_luf = tree_luf.predict(x_future_luf)
print('Tree prediction preview')
print(tree_prediction_luf)
print()
#Show the model linear regression prediction
lr_prediction_luf = lr_luf.predict(x_future_luf)
print('Linear regression prediction preview')
print(lr_prediction_luf)
print()

#Compute the mean absolute error for Tree Regression predictions
mae_tree_luf = mean_absolute_error(luf['Price'].iloc[-271:-1],tree_prediction_luf.tolist())
print('Mean absolute error for Tree Decision')
print(round(mae_tree_luf,3))
print()
#Compute the mean absolute error for Linear Regression predictions
mae_lr_luf = mean_absolute_error(luf['Price'].iloc[-271:-1],lr_prediction_luf.tolist())
print('Mean absolute error for Linear Regression')
print(round(mae_lr_luf,3))
print()

#Compute the r2 score for Tree Regression predictions
r2_tree_luf = r2_score(luf['Price'].iloc[-271:-1],tree_prediction_luf.tolist())
print('r2 score for Tree Decision')
print(round(r2_tree_luf,3))
print()
#Compute the r2 score for Linear Regression predictions
r2_lr_luf = r2_score(luf['Price'].iloc[-271:-1],lr_prediction_luf.tolist())
print('r2 score for Linear Regression')
print(round(r2_lr_luf,3))
print()

#Compute the max error for Tree Regression predictions
maxerr_tree_luf = max_error(luf['Price'].iloc[-271:-1],tree_prediction_luf.tolist())
print('max error for Tree Decision')
print(round(maxerr_tree_luf,3))
print()
#Compute the max error for Linear Regression predictions
maxerr_lr_luf = max_error(luf['Price'].iloc[-271:-1],lr_prediction_luf.tolist())
print('max error for Linear Regression')
print(round(maxerr_lr_luf,3))
print()


#Visualize the data of tree decision
predictions1_luf = tree_prediction_luf
valid1_luf = luf[Xluf.shape[0]:]
valid1_luf['Predictions'] = predictions1_luf
tree_final_luf=pd.concat([luf['Price'],valid1_luf['Predictions']],axis=1)
tree_final_luf[['Price','Predictions']].iplot(kind='spread',theme='white',title='Lufthansa Stock Pediction with Tree Decision',xTitle='Days',yTitle='Close Price ($)')


#Visualize the data of linear regression
predictions2_luf = lr_prediction_luf
valid2_luf = luf[Xluf.shape[0]:]
valid2_luf['Predictions'] = predictions2_luf
lr_final_luf=pd.concat([luf['Price'],valid2_luf['Predictions']],axis=1)
lr_final_luf[['Price','Predictions']].iplot(kind='spread',theme='white',title='Lufthansa Stock Prediction with Linear Regression ',xTitle='Days',yTitle='Close Price ($)')

"Statistical features of Lufthansa"
             Price         Open         High          Low
count  2698.000000  2698.000000  2698.000000  2698.000000
mean     15.002817    15.010178    15.197338    14.804759
std       4.707906     4.703818     4.749552     4.658215
min       7.180000     7.278000     7.530000     7.020000
25%      11.831250    11.830000    11.982500    11.662500
50%      14.117500    14.095000    14.290000    13.947500
75%      16.817500    16.847500    17.027500    16.647500
max      31.120000    31.000000    31.260000    30.945000

Lufthansa's data set rows,columns
(2698, 7)



Tree prediction preview
[14.515      14.53       14.21       11.425      14.68       14.415
 14.58       23.27       16.46       12.97       12.97       14.245
 14.175      13.17       13.23       13.1        13.45       16.455
 14.175      13.985      15.23       15.735      11.9465     13.885
 14.945      14.135      14.09       14.535      14.785      15.47
 14.945      13.885      14.505      13.83       18.815      14.7225
 14.215      14.505      14.175      13.83       11.5235      8.562
 11.5235     11.5235     13.935      10.535      14.105      14.125
 11.5235     14.765      14.805      15.095      15.32       15.47
 15.51       14.79       15.735      13.735      15.82       15.895
 15.8        15.63       18.75       15.545      15.755      16.3625
 15.97       16.155      20.56       17.425      17.8        17.715
 15.5        17.51       17.695      17.465      20.64       12.7
 17.2        17.23       17.165      17.235      17.255      13.565
 11.605      14.9125     1

# GOOGLE 10% FUTURE 25% TEST

In [6]:
#Load the data
google = pd.read_csv('GOOGL.csv',thousands=',')

#Print out statistical features of the dat set
print('"Statistical features of Google"')
print(google.describe())
print()

#Print out the file's rows and columns
print("Google's data set rows,columns")
print(google.shape)
print()

#Convert object type to float
google['Price'] = pd.to_numeric(google['Price'], errors='coerce')

#Plot the stock data
google.iplot(x='Date',y='Price',title='Google Stock',xTitle='Date',yTitle='Close Price ($)',fill=True,colors=['green'],theme='white')

#Get the close price
google = google[['Price']]

#Create a variable to predict the 'x' days out into the future (10%)
future_days = 270

#Create a new column (target) shifted 'x' units/days up
google['Prediction']= google[['Price']].shift(-future_days)

#Create the feature data set (X) and convert it to a numpy array and remove the last 'x' rows/days
Xgoogle = np.array(google.drop(['Prediction'],axis=1)[:-future_days])

#Create the target data set (Y) and convert it to a numpy array and get all of the target values except the last 'x' rows/days
Ygoogle = np.array(google['Prediction'])[:-future_days]

#Split the data into 75% training and 25% testing
x_train_google, x_test_google, y_train_google, y_test_google = train_test_split(Xgoogle, Ygoogle, test_size=0.25)

#Create the models
#Create the decision tree regressor model
tree_google = DecisionTreeRegressor().fit(x_train_google,y_train_google)
#Create the linear regression model
lr_google = LinearRegression().fit(x_train_google, y_train_google)

#Get the last 'x' rows of the feature data set
x_future_google = google.drop(['Prediction'],1)[:-future_days]
x_future_google = x_future_google.tail(future_days)
x_future_google = np.array(x_future_google)
x_future_google

#Show the model tree prediction
tree_prediction_google = tree_google.predict(x_future_google)
print('Tree prediction preview')
print(tree_prediction_google)
print()
#Show the model linear regression prediction
lr_prediction_google = lr_google.predict(x_future_google)
print('Linear regression prediction preview')
print(lr_prediction_google)
print()

#Compute the mean absolute error for Tree Regression predictions
mae_tree_google= mean_absolute_error(google['Price'].iloc[-271:-1],tree_prediction_google.tolist())
print('Mean absolute error for Tree Decision')
print(round(mae_tree_google,3))
print()
#Compute the mean absolute error for Linear Regression predictions
mae_lr_google = mean_absolute_error(google['Price'].iloc[-271:-1],lr_prediction_google.tolist())
print('Mean absolute error for Linear Regression')
print(round(mae_lr_google,3))
print()

#Compute the r2 score for Tree Regression predictions
r2_tree_google = r2_score(google['Price'].iloc[-271:-1],tree_prediction_google.tolist())
print('r2 score for Tree Decision')
print(round(r2_tree_google,3))
print()
#Compute the r2 score for Linear Regression predictions
r2_lr_google = r2_score(google['Price'].iloc[-271:-1],lr_prediction_google.tolist())
print('r2 score for Linear Regression')
print(round(r2_lr_google,3))
print()

#Compute the max error for Tree Regression predictions
maxerr_tree_google = max_error(google['Price'].iloc[-271:-1],tree_prediction_google.tolist())
print('max error for Tree Decision')
print(round(maxerr_tree_google,3))
print()
#Compute the max error for Linear Regression predictions
maxerr_lr_google = max_error(google['Price'].iloc[-271:-1],lr_prediction_google.tolist())
print('max error for Linear Regression')
print(round(maxerr_lr_google,3))
print()


#Visualize the data of tree decision
predictions1_google = tree_prediction_google
valid1_google = google[Xgoogle.shape[0]:]
valid1_google['Predictions'] = predictions1_google
tree_final_google=pd.concat([google['Price'],valid1_google['Predictions']],axis=1)
tree_final_google[['Price','Predictions']].iplot(kind='spread',theme='white',title='Google Stock Pediction with Tree Decision',xTitle='Days',yTitle='Close Price ($)')


#Visualize the data of linear regression
predictions2_google = lr_prediction_google
valid2_google = google[Xgoogle.shape[0]:]
valid2_google['Predictions'] = predictions2_google
lr_final_google=pd.concat([google['Price'],valid2_google['Predictions']],axis=1)
lr_final_google[['Price','Predictions']].iplot(kind='spread',theme='white',title='Google Stock Prediction with Linear Regression ',xTitle='Days',yTitle='Close Price ($)')

"Statistical features of Google"
             Price         Open         High          Low  Unnamed: 7
count  2683.000000  2683.000000  2683.000000  2683.000000         0.0
mean    698.075833   697.962620   704.181088   691.443533         NaN
std     366.682302   366.200282   370.030369   362.653943         NaN
min     218.250000   219.370000   221.360000   217.030000         NaN
25%     334.445000   333.775000   336.540000   330.695000         NaN
50%     592.540000   592.130000   595.640000   589.000000         NaN
75%    1037.335000  1039.345000  1050.950000  1025.665000         NaN
max    1644.130000  1647.990000  1651.750000  1621.710000         NaN

Google's data set rows,columns
(2683, 8)



Tree prediction preview
[1171.08  1171.08  1175.91  1206.19  1265.23  1643.83  1164.25  1164.25
 1169.32  1261.15  1200.44  1183.53  1191.58  1191.52  1225.95  1375.18
 1170.82  1173.75  1356.86  1190.53  1169.55  1182.27  1212.19  1206.32
 1225.95  1205.7   1220.    1234.97  1240.03  1231.63  1238.75  1232.65
 1238.75  1238.75  1257.43  1218.33  1245.94  1242.29  1225.95  1375.18
 1206.    1169.32  1153.46  1210.96  1208.25  1190.13  1175.91  1116.56
 1215.71  1150.51  1242.24  1243.    1252.8   1244.41  1244.28  1241.2
 1257.63  1259.11  1264.3   1288.98  1260.66  1317.32  1258.8   1272.25
 1196.32  1469.535 1125.89  1306.94  1309.    1483.46  1116.79  1296.18
 1513.39  1333.54  1319.84  1312.59  1301.86  1251.3   1293.67  1305.64
 1313.    1312.13  1304.09  1172.27  1294.74  1318.94  1326.96  1339.39
 1342.99  1342.89  1442.    1348.49  1078.63  1087.58  1354.89  1351.91
 1356.44  1351.22  1350.63  1344.43  1125.89  1354.64  1236.13  1218.2
 1169.19  1361.52  1397.81  1070.06  1405.

# APPLE 10% FUTURE 25% TEST

In [13]:
#Load the data
apple = pd.read_csv('AAPL.csv',thousands=',')

#Print out statistical features of the dat set
print('"Statistical features of Apple"')
print(apple.describe())
print()

#Print out the file's rows and columns
print("Apple's data set rows,columns")
print(apple.shape)
print()

#Convert object type to float
apple['Price'] = pd.to_numeric(apple['Price'], errors='coerce')

#Plot the stock data
apple.iplot(x='Date',y='Price',title='Apple Stock',xTitle='Date',yTitle='Close Price ($)',fill=True,colors=['green'],theme='white')

#Get the close price
apple = apple[['Price']]

#Create a variable to predict the 'x' days out into the future (10%)
future_days = 1072

#Create a new column (target) shifted 'x' units/days up
apple['Prediction']= apple[['Price']].shift(-future_days)

#Create the feature data set (X) and convert it to a numpy array and remove the last 'x' rows/days
Xapple = np.array(apple.drop(['Prediction'],axis=1)[:-future_days])

#Create the target data set (Y) and convert it to a numpy array and get all of the target values except the last 'x' rows/days
Yapple = np.array(apple['Prediction'])[:-future_days]

#Split the data into 75% training and 25% testing
x_train_apple, x_test_apple, y_train_apple, y_test_apple = train_test_split(Xapple, Yapple, test_size=0.25)

#Create the models
#Create the decision tree regressor model
tree_apple = DecisionTreeRegressor().fit(x_train_apple,y_train_apple)
#Create the linear regression model
lr_apple = LinearRegression().fit(x_train_apple, y_train_apple)

#Get the last 'x' rows of the feature data set
x_future_apple = apple.drop(['Prediction'],1)[:-future_days]
x_future_apple = x_future_apple.tail(future_days)
x_future_apple = np.array(x_future_apple)
x_future_apple

#Show the model tree prediction
tree_prediction_apple = tree_apple.predict(x_future_apple)
print('Tree prediction preview')
print(tree_prediction_apple)
print()
#Show the model linear regression prediction
lr_prediction_apple = lr_apple.predict(x_future_apple)
print('Linear regression prediction preview')
print(lr_prediction_apple)
print()

#Compute the mean absolute error for Tree Regression predictions
mae_tree_apple= mean_absolute_error(apple['Price'].iloc[-(future_days+1):-1],tree_prediction_apple.tolist())
print('Mean absolute error for Tree Decision')
print(round(mae_tree_apple,3))
print()
#Compute the mean absolute error for Linear Regression predictions
mae_lr_apple = mean_absolute_error(apple['Price'].iloc[-(future_days+1):-1],lr_prediction_apple.tolist())
print('Mean absolute error for Linear Regression')
print(round(mae_lr_apple,3))
print()

#Compute the r2 score for Tree Regression predictions
r2_tree_apple = r2_score(apple['Price'].iloc[-(future_days+1):-1],tree_prediction_apple.tolist())
print('r2 score for Tree Decision')
print(round(r2_tree_apple,3))
print()
#Compute the r2 score for Linear Regression predictions
r2_lr_apple = r2_score(apple['Price'].iloc[-(future_days+1):-1],lr_prediction_apple.tolist())
print('r2 score for Linear Regression')
print(round(r2_lr_apple,3))
print()

#Compute the max error for Tree Regression predictions
maxerr_tree_apple = max_error(apple['Price'].iloc[-(future_days+1):-1],tree_prediction_apple.tolist())
print('max error for Tree Decision')
print(round(maxerr_tree_apple,3))
print()
#Compute the max error for Linear Regression predictions
maxerr_lr_apple = max_error(apple['Price'].iloc[-(future_days+1):-1],lr_prediction_apple.tolist())
print('max error for Linear Regression')
print(round(maxerr_lr_apple,3))
print()


#Visualize the data of tree decision
predictions1_apple = tree_prediction_apple
valid1_apple = apple[Xapple.shape[0]:]
valid1_apple['Predictions'] = predictions1_apple
tree_final_apple=pd.concat([apple['Price'],valid1_apple['Predictions']],axis=1)
tree_final_apple[['Price','Predictions']].iplot(kind='spread',theme='white',title='Apple Stock Pediction with Tree Decision',xTitle='Days',yTitle='Close Price ($)')


#Visualize the data of linear regression
predictions2_apple = lr_prediction_apple
valid2_apple = apple[Xapple.shape[0]:]
valid2_apple['Predictions'] = predictions2_apple
lr_final_apple=pd.concat([apple['Price'],valid2_apple['Predictions']],axis=1)
lr_final_apple[['Price','Predictions']].iplot(kind='spread',theme='white',title='Apple Stock Prediction with Linear Regression ',xTitle='Days',yTitle='Close Price ($)')

"Statistical features of Apple"
             Price         Open         High          Low
count  2683.000000  2683.000000  2683.000000  2683.000000
mean    124.019359   123.930939   125.165352   122.745162
std      77.711044    77.483176    78.486619    76.693962
min      27.440000    27.480000    28.000000    27.180000
25%      67.020000    67.130000    67.945000    66.570000
50%     105.680000   105.470000   106.470000   104.700000
75%     165.090000   164.940000   167.315000   163.350000
max     506.090000   514.790000   515.140000   503.040000

Apple's data set rows,columns
(2683, 7)



Tree prediction preview
[100.41  160.5   135.72  ... 293.16  506.09  326.495]

Linear regression prediction preview
[160.89915152 162.3853571  163.21486719 ... 200.05894034 202.59931499
 205.57172615]

Mean absolute error for Tree Decision
16.984

Mean absolute error for Linear Regression
48.468

r2 score for Tree Decision
0.654

r2 score for Linear Regression
0.196

max error for Tree Decision
357.08

max error for Linear Regression
303.371



# DOW 10% FUTURE 25% TEST

In [10]:
#Load the data
dow = pd.read_csv('DOW.csv',thousands=',')

#Print out statistical features of the dat set
print('"Statistical features of Dow Jones"')
print(dow.describe())
print()

#Print out the file's rows and columns
print("Dow Jone's data set rows,columns")
print(dow.shape)
print()

#Convert object type to float
dow['Price'] = pd.to_numeric(dow['Price'], errors='coerce')

#Plot the stock data
dow.iplot(x='Date',y='Price',title='Dow Jones Stock',xTitle='Date',yTitle='Close Price ($)',fill=True,colors=['green'],theme='white')

#Get the close price
dow = dow[['Price']]

#Create a variable to predict the 'x' days out into the future (10%)
future_days = 344

#Create a new column (target) shifted 'x' units/days up
dow['Prediction']= dow[['Price']].shift(-future_days)

#Create the feature data set (X) and convert it to a numpy array and remove the last 'x' rows/days
Xdow = np.array(dow.drop(['Prediction'],axis=1)[:-future_days])

#Create the target data set (Y) and convert it to a numpy array and get all of the target values except the last 'x' rows/days
Ydow = np.array(dow['Prediction'])[:-future_days]

#Split the data into 75% training and 25% testing
x_train_dow, x_test_dow, y_train_dow, y_test_dow = train_test_split(Xdow, Ydow, test_size=0.25)

#Create the models
#Create the decision tree regressor model
tree_dow = DecisionTreeRegressor().fit(x_train_dow,y_train_dow)
#Create the linear regression model
lr_dow = LinearRegression().fit(x_train_dow, y_train_dow)

#Get the last 'x' rows of the feature data set
x_future_dow = dow.drop(['Prediction'],1)[:-future_days]
x_future_dow = x_future_dow.tail(future_days)
x_future_dow = np.array(x_future_dow)
x_future_dow

#Show the model tree prediction
tree_prediction_dow = tree_dow.predict(x_future_dow)
print('Tree prediction preview')
print(tree_prediction_dow)
print()
#Show the model linear regression prediction
lr_prediction_dow = lr_dow.predict(x_future_dow)
print('Linear regression prediction preview')
print(lr_prediction_dow)
print()

#Compute the mean absolute error for Tree Regression predictions
mae_tree_dow= mean_absolute_error(dow['Price'].iloc[-345:-1],tree_prediction_dow.tolist())
print('Mean absolute error for Tree Decision')
print(round(mae_tree_dow,3))
print()
#Compute the mean absolute error for Linear Regression predictions
mae_lr_dow = mean_absolute_error(dow['Price'].iloc[-345:-1],lr_prediction_dow.tolist())
print('Mean absolute error for Linear Regression')
print(round(mae_lr_dow,3))
print()

#Compute the r2 score for Tree Regression predictions
r2_tree_dow = r2_score(dow['Price'].iloc[-345:-1],tree_prediction_dow.tolist())
print('r2 score for Tree Decision')
print(round(r2_tree_dow,3))
print()
#Compute the r2 score for Linear Regression predictions
r2_lr_dow = r2_score(dow['Price'].iloc[-345:-1],lr_prediction_dow.tolist())
print('r2 score for Linear Regression')
print(round(r2_lr_dow,3))
print()

#Compute the max error for Tree Regression predictions
maxerr_tree_dow = max_error(dow['Price'].iloc[-345:-1],tree_prediction_dow.tolist())
print('max error for Tree Decision')
print(round(maxerr_tree_dow,3))
print()
#Compute the max error for Linear Regression predictions
maxerr_lr_dow = max_error(dow['Price'].iloc[-345:-1],lr_prediction_dow.tolist())
print('max error for Linear Regression')
print(round(maxerr_lr_dow,3))
print()


#Visualize the data of tree decision
predictions1_dow = tree_prediction_dow
valid1_dow = dow[Xdow.shape[0]:]
valid1_dow['Predictions'] = predictions1_dow
tree_final_dow=pd.concat([dow['Price'],valid1_dow['Predictions']],axis=1)
tree_final_dow[['Price','Predictions']].iplot(kind='spread',theme='white',title='Dow Jones Stock Pediction with Tree Decision',xTitle='Days',yTitle='Close Price ($)')


#Visualize the data of linear regression
predictions2_dow = lr_prediction_dow
valid2_dow = dow[Xdow.shape[0]:]
valid2_dow['Predictions'] = predictions2_dow
lr_final_dow=pd.concat([dow['Price'],valid2_dow['Predictions']],axis=1)
lr_final_dow[['Price','Predictions']].iplot(kind='spread',theme='white',title='Dow Jones Stock Prediction with Linear Regression ',xTitle='Days',yTitle='Close Price ($)')

"Statistical features of Dow Jones"
              Price          Open          High           Low  Unnamed: 7
count   3436.000000   3436.000000   3436.000000   3436.000000         0.0
mean   16579.617861  16576.467846  16683.818868  16464.249930         NaN
std     5676.989304   5676.819525   5680.625129   5670.458307         NaN
min     6547.050000   6547.010000   6758.440000   6440.080000         NaN
25%    12258.725000  12258.472500  12365.387500  12133.145000         NaN
50%    15616.885000  15614.180000  15654.990000  15519.190000         NaN
75%    20692.305000  20681.382500  20764.797500  20620.222500         NaN
max    29551.420000  29440.470000  29568.570000  29406.750000         NaN

Dow Jone's data set rows,columns
(3436, 8)



Tree prediction preview
[27492.56 26511.05 27137.04 26597.05 26462.08 26543.33 26554.39 26592.91
 26430.14 26307.79 26504.95 26438.48 25965.09 25967.33 25828.36 25942.37
 25324.99 25532.05 25648.02 25862.68 27934.02 25679.9  25877.33 25776.61
 25490.47 25585.69 28376.96 25126.41 25169.88 28907.05 26085.8  25332.18
 25539.57 25539.57 25983.94 26062.68 26048.51 26004.83 25332.18 26089.61
 20704.91 26465.54 26504.   26753.17 26719.13 26378.19 26548.22 26536.82
 26526.58 26599.96 26805.53 26786.68 26966.   27001.98 26966.   26783.49
 26860.2  27088.08 27332.03 27359.16 26935.07 27219.85 26573.72 27154.2
 27171.9  27349.19 27269.97 27140.98 27192.45 27221.35 27198.02 27359.16
 26583.42 26496.67 25717.74 26029.52 26007.07 26378.19 26287.44 27182.45
 26279.91 25479.42 25579.39 25886.01 26135.79 25962.44 26202.73 26548.22
 25628.9  25898.83 25777.9  26036.1  26362.25 26403.28 26118.02 26355.47
 26728.15 27182.45 26462.08 26909.43 27137.04 27182.45 27219.52 27076.82
 27110.8  27781.96 27094.79 

In [54]:
#Load the data
netflix = pd.read_csv('NFLX.csv')

#Print out statistical features of the dataset
print('"Statistical features of Netflix"')
print(netflix.describe())
print()

#Print out the file's rows and columns
print("Netflix's data set rows,columns")
print(netflix.shape)
print()

#Convert object type to float
netflix['Close'] = pd.to_numeric(netflix['Close'], errors='coerce')

#Plot the stock data
netflix.iplot(x='Date',y='Close',title='Netflix Stock',xTitle='Date',yTitle='Close Price ($)',fill=True,colors=['green'],theme='white')

#Make a DataFrame to store r2 values
p1 = pd.DataFrame(columns=['Days','r2 Linear Regression'],dtype='float')
p2 = pd.DataFrame(columns=['Days','r2 Tree Decision'],dtype='float')
p3= pd.DataFrame(columns=['Days','mae LR'],dtype='float')
p4 = pd.DataFrame(columns=['Days','mae Tree'],dtype='float')

#Create a variable to predict the 'x' days out into the future
for i in range(10,100):
    future_days = i
    
#Get the close price
    netflix = netflix[['Close']]
    
#Create a new column (target) shifted 'x' units/days up
    netflix['Prediction']= netflix[['Close']].shift(-future_days)

#Create the feature data set (X) and convert it to a numpy array and remove the last 'x' rows/days
    Xnetflix = np.array(netflix.drop(['Prediction'],axis=1)[:-future_days])

#Create the target data set (Y) and convert it to a numpy array and get all of the target values except the last 'x' rows/days
    Ynetflix = np.array(netflix['Prediction'])[:-future_days]

#Split the data into 75% training and 25% testing
    x_train_netflix, x_test_netflix, y_train_netflix, y_test_netflix = train_test_split(Xnetflix, Ynetflix, test_size=0.25)

#Create the models
#Create the decision tree regressor model
    tree_netflix = DecisionTreeRegressor().fit(x_train_netflix,y_train_netflix)
#Create the linear regression model
    lr_netflix = LinearRegression().fit(x_train_netflix, y_train_netflix)

#Get the last 'x' rows of the feature data set
    x_future_netflix = netflix.drop(['Prediction'],1)[:-future_days]
    x_future_netflix = x_future_netflix.tail(future_days)
    x_future_netflix = np.array(x_future_netflix)
    #x_future_netflix

#Show the model tree prediction
    tree_prediction_netflix = tree_netflix.predict(x_future_netflix)
    #print('Tree prediction preview')
    #print(tree_prediction_netflix)
    #print()
#Show the model linear regression prediction
    lr_prediction_netflix = lr_netflix.predict(x_future_netflix)
    #print('Linear regression prediction preview')
    #print(lr_prediction_netflix)
    #print()

#Compute the mean absolute error for Tree Regression predictions
    mae_tree_netflix = mean_absolute_error(netflix['Close'].iloc[-(i+1):-1],tree_prediction_netflix.tolist())
    #print('Mean absolute error for Tree Regression')
    #print(round(mae_tree_netflix,3))
    #print()
#Compute the mean absolute error for Linear Regression predictions
    mae_lr_netflix = mean_absolute_error(netflix['Close'].iloc[-(i+1):-1],lr_prediction_netflix.tolist())
    #print('Mean absolute error for Linear Regression')
    #print(round(mae_lr_netflix,3))
    #print()

#Compute the r2 score for Tree Regression predictions
    r2_tree_netflix = r2_score(netflix['Close'].iloc[-(i+1):-1],tree_prediction_netflix.tolist())
    #print('r2 score for Tree Regression')
    #print(round(r2_tree_netflix,3))
    #print()
#Compute the r2 score for Linear Regression predictions
    r2_lr_netflix = r2_score(netflix['Close'].iloc[-(i+1):-1],lr_prediction_netflix.tolist())
    #print('r2 score for Linear Regression')
    #print(round(r2_lr_netflix,3))
    #print()

#Compute the max error for Tree Regression predictions
    maxerr_tree_netflix = max_error(netflix['Close'].iloc[-(i+1):-1],tree_prediction_netflix.tolist())
    #print('max error for Tree Regression')
    #print(round(maxerr_tree_netflix,3))
    #print()
#Compute the max error for Linear Regression predictions
    maxerr_lr_netflix = max_error(netflix['Close'].iloc[-(i+1):-1],lr_prediction_netflix.tolist())
    #print('max error for Linear Regression')
    #print(round(maxerr_lr_netflix,3))
    #print()


#Visualize the data of tree decision
    #predictions1_netflix = tree_prediction_netflix
    #valid1_netflix = netflix[Xnetflix.shape[0]:]
    #valid1_netflix['Predictions'] = predictions1_netflix
    #tree_final_netflix=pd.concat([netflix['Close'],valid1_netflix['Predictions']],axis=1)
    #tree_final_netflix[['Close','Predictions']].iplot(kind='spread',theme='white',title='Netflix Stock Pediction with Tree Decision',xTitle='Days',yTitle='Close Price ($)')


#Visualize the data of linear regression
    #predictions2_netflix = lr_prediction_netflix
    #valid2_netflix = netflix[Xnetflix.shape[0]:]
    #valid2_netflix['Predictions'] = predictions2_netflix
    #lr_final_netflix=pd.concat([netflix['Close'],valid2_netflix['Predictions']],axis=1)
    #lr_final_netflix[['Close','Predictions']].iplot(kind='spread',theme='white',title='Netflix Stock Prediction with Linear Regression ',xTitle='Days',yTitle='Close Price ($)')      
         
    p1 = p1.append({'Days':future_days,'r2 Linear Regression':r2_lr_netflix},ignore_index=True)
    p2 = p2.append({'Days':future_days,'r2 Tree Decision':r2_tree_netflix},ignore_index=True)
    p3 = p3.append({'Days':future_days,'mae LR':mae_lr_netflix},ignore_index=True)
    p4 = p4.append({'Days':future_days,'mae Tree':mae_tree_netflix},ignore_index=True)
p1.iplot(x='Days',y='r2 Linear Regression',title='R2 for Linear Regression values (10-100 days)',xTitle='Days',yTitle='R2 Linear Regression',fill=True,colors=['green'],theme='white')
p2.iplot(x='Days',y='r2 Tree Decision',title='R2 for Tree Decision values (10-100 days)',xTitle='Days',yTitle='R2 Tree Decision',fill=True,colors=['green'],theme='white')
p3.iplot(x='Days',y='mae LR',title='MAE for Linear Regression values (10-100 days)',xTitle='Days',yTitle='MAE Linear Regression',fill=True,colors=['green'],theme='white')
p4.iplot(x='Days',y='mae Tree',title='MAE for Tree Decision values (10-100 days)',xTitle='Days',yTitle='MAE Tree Decision',fill=True,colors=['green'],theme='white')
maxval = p1.idxmax(axis=0)

"Statistical features of Netflix"
             Open        High         Low       Close   Adj Close  \
count  254.000000  254.000000  254.000000  254.000000  254.000000   
mean   371.885157  378.668819  365.765336  372.772992  372.772992   
std     76.348062   77.895003   74.679086   76.540696   76.540696   
min    255.710007  265.000000  252.279999  254.589996  254.589996   
25%    300.890007  305.680007  295.007492  299.822495  299.822495   
50%    363.539994  370.944992  356.449997  363.534988  363.534988   
75%    435.560005  441.442497  427.247490  434.425011  434.425011   
max    567.979980  575.369995  520.960022  548.729980  548.729980   

             Volume  
count  2.540000e+02  
mean   7.685473e+06  
std    4.509276e+06  
min    2.019300e+06  
25%    4.920875e+06  
50%    6.447200e+06  
75%    8.762150e+06  
max    3.825890e+07  

Netflix's data set rows,columns
(254, 7)

