<h1>This program predicts stock prices by using machine learning</h1>


In [0]:
#Import modules

import quandl
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split

In [5]:
#Get stock data
df = quandl.get("WIKI/FB")

#View Data
print(df.head())

             Open   High    Low  ...  Adj. Low  Adj. Close  Adj. Volume
Date                             ...                                   
2012-05-18  42.05  45.00  38.00  ...     38.00     38.2318  573576400.0
2012-05-21  36.53  36.66  33.00  ...     33.00     34.0300  168192700.0
2012-05-22  32.61  33.59  30.94  ...     30.94     31.0000  101786600.0
2012-05-23  31.37  32.50  31.36  ...     31.36     32.0000   73600000.0
2012-05-24  32.95  33.21  31.77  ...     31.77     33.0300   50237200.0

[5 rows x 12 columns]


In [8]:
#Get the Adjusted Close Price
df = df[['Adj. Close']]

#View data
print(df.head())

            Adj. Close
Date                  
2012-05-18     38.2318
2012-05-21     34.0300
2012-05-22     31.0000
2012-05-23     32.0000
2012-05-24     33.0300


In [32]:
#A variable for predicting 'n' days out into the future
forecast_out = 30

#Ship values up # of days want to predict 
#Create another column (the target or dependent variable) shifted 'n' units up
days_up = 2
df['Prediction'] = df[['Adj. Close']].shift(-forecast_out)

#View top of new data set
print(df.head())

#View bottom of new data set
print(df.tail())

            Adj. Close  Prediction
Date                              
2012-05-18     38.2318      30.771
2012-05-21     34.0300      31.200
2012-05-22     31.0000      31.470
2012-05-23     32.0000      31.730
2012-05-24     33.0300      32.170
            Adj. Close  Prediction
Date                              
2018-03-21      169.39         NaN
2018-03-22      164.89         NaN
2018-03-23      159.39         NaN
2018-03-26      160.06         NaN
2018-03-27      152.19         NaN


In [0]:
#Create the independet data set X
X = np.array(df.drop(['Prediction'],axis=1))

In [0]:
#Remove the last 'n' rows
X = X[:-forecast_out]

In [35]:
X

array([[ 38.2318],
       [ 34.03  ],
       [ 31.    ],
       ...,
       [171.5499],
       [175.98  ],
       [176.41  ]])

In [38]:
#Create the dependent dataset y
#Convert dataframe to a numpy array (ALl of values including NaN)

y = np.array(df['Prediction'])

#Retrieve all y values except for the last n rows

y = y[:-forecast_out]

print(y)

[ 30.771  31.2    31.47  ... 159.39  160.06  152.19 ]


<h1>Split Data into Training and Testing</h1>

In [0]:
#Split the data into 80% Training and 20% Testing
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2)

<h2>Algorithm 1: Support Vector Machine</h2>


In [40]:
#Create and train model using SVM algorithm (Supervisded Regression algorithm)

svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1) #radio basis kernel

#Train/fit model with X and y training data

svr_rbf.fit(X_train,y_train)

SVR(C=1000.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma=0.1,
    kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [42]:
#Testing Model: Score returns coefficient of determination
#Best possible score is 1.0, or 100%

#score using X and y testing sets
svm_confidence = svr_rbf.score(X_test, y_test)

print('SVM Confidence: ', svm_confidence)

SVM Confidence:  0.9826058120220165


<h2>Algorithm 2: Linear Regression</h2>


In [43]:
#Create and Train Linear Regression Model

#classifier
lr = LinearRegression()

#Train the model using X and y training sets
lr.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [44]:
#Testing Model: Score returns coefficient of determination
#Best possible score is 1.0, or 100%

lr_confidence = lr.score(X_test, y_test)
print("lr confidence: ", lr_confidence)

lr confidence:  0.9781049085488984


In [46]:
#Set x_forecast equal to the last 30 rows of the original data set from Adj. Close
x_forecast = np.array(df.drop(['Prediction'],axis=1))[-forecast_out:]

print(x_forecast)

[[173.15]
 [179.52]
 [179.96]
 [177.36]
 [176.01]
 [177.91]
 [178.99]
 [183.29]
 [184.93]
 [181.46]
 [178.32]
 [175.94]
 [176.62]
 [180.4 ]
 [179.78]
 [183.71]
 [182.34]
 [185.23]
 [184.76]
 [181.88]
 [184.19]
 [183.86]
 [185.09]
 [172.56]
 [168.15]
 [169.39]
 [164.89]
 [159.39]
 [160.06]
 [152.19]]


<h2>Linear Regression Prediction</h2>



In [47]:
#Print predictions for the next n days
lr_prediction = lr.predict(x_forecast)

#Predicted results for next n days
print(lr_prediction)

[177.43422853 183.89896155 184.34550511 181.70683858 180.33676172
 182.26501804 183.36107952 187.72502802 189.38941769 185.86781273
 182.68111545 180.2657207  180.95583349 184.79204868 184.1628282
 188.15127416 186.76089987 189.69387921 189.21688949 186.29405886
 188.63841259 188.30350492 189.55179716 176.8354542  172.35986981
 173.61831077 169.05138792 163.46959333 164.14955739 156.16251676]


<h2>SVM Prediction</h2>


In [48]:
#Print predictions for the next n days
svm_prediction = svr_rbf.predict(x_forecast)

#Predicted results for next n days
print(svm_prediction)

[178.21683409 181.99050994 183.37915792 175.32104852 172.96306245
 177.1627184  180.50694123 182.79730795 177.37260695 188.4904458
 178.52788756 172.96547659 173.48084614 184.98507117 182.78465788
 180.12448713 187.95091258 178.25309424 177.12939048 188.80649383
 177.85912504 179.29016552 177.77715294 178.83325138 171.06044728
 173.10560899 173.91097253 167.5684557  166.25472978 159.88409077]
