In [82]:
# This program predict stock
## Install quandl
!pip install quandl

Defaulting to user installation because normal site-packages is not writeable


### Install the dependencies
- This program predicts stock prices by using machine learning models

In [83]:
import quandl
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split

### Get the stock data

In [84]:
df = quandl.get("WIKI/FB") 
print(df.head()) # Take a look at the data by printing it out

             Open   High    Low    Close       Volume  Ex-Dividend  \
Date                                                                 
2012-05-18  42.05  45.00  38.00  38.2318  573576400.0          0.0   
2012-05-21  36.53  36.66  33.00  34.0300  168192700.0          0.0   
2012-05-22  32.61  33.59  30.94  31.0000  101786600.0          0.0   
2012-05-23  31.37  32.50  31.36  32.0000   73600000.0          0.0   
2012-05-24  32.95  33.21  31.77  33.0300   50237200.0          0.0   

            Split Ratio  Adj. Open  Adj. High  Adj. Low  Adj. Close  \
Date                                                                  
2012-05-18          1.0      42.05      45.00     38.00     38.2318   
2012-05-21          1.0      36.53      36.66     33.00     34.0300   
2012-05-22          1.0      32.61      33.59     30.94     31.0000   
2012-05-23          1.0      31.37      32.50     31.36     32.0000   
2012-05-24          1.0      32.95      33.21     31.77     33.0300   

           

### Get the Adjusted Close Price

In [85]:
df = df[["Adj. Close"]]
# Take a look at the new data
print(df.head())

            Adj. Close
Date                  
2012-05-18     38.2318
2012-05-21     34.0300
2012-05-22     31.0000
2012-05-23     32.0000
2012-05-24     33.0300


### A variable for predicting 'n' days out into the future.

In [86]:
forecast_out = 30
# Create another column (the target or dependant variable) shifted 'n' units up
df['Prediction'] = df[['Adj. Close']].shift(-forecast_out)
# Print out the new data set
print(df.head())

            Adj. Close  Prediction
Date                              
2012-05-18     38.2318      30.771
2012-05-21     34.0300      31.200
2012-05-22     31.0000      31.470
2012-05-23     32.0000      31.730
2012-05-24     33.0300      32.170


### Create the independent data set -> X

In [87]:
# Convert the dataframe to a numpy array
X = np.array(df.drop(['Prediction'], axis=1))

# Remove the last 'n' rows
X = X[:-forecast_out]
print(X)

[[ 38.2318]
 [ 34.03  ]
 [ 31.    ]
 ...
 [171.5499]
 [175.98  ]
 [176.41  ]]


### Create the dependent data set -> y

In [88]:
# Convert the dataframe to a numpy array (including NaN value)
y = np.array(df['Prediction'])

# All y values except the last 'n' rows
y = y[:-forecast_out]

print(y)

[ 30.771  31.2    31.47  ... 159.39  160.06  152.19 ]


### Split the data into 80:20 for training and testing size

In [89]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

### Create and train the Support Vector Machine (Regressor)

In [90]:
svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1) # Radio Basis Kernel
svr_rbf.fit(X_train, y_train)

### Testing our model
Score returns the coefficient of determination R squared of the prediction.

The best possible score iss 1.0.

In [91]:
svm_confidence = svr_rbf.score(X_test, y_test)
print("svm_confidence: ", svm_confidence)

svm_confidence:  0.9835510378785177


### Linear Regression Model

In [92]:
# Create and train the LM Model
lr = LinearRegression()

# Train the model
lr.fit(X_train, y_train)

### Test the LM Model
Score returns the coefficient of determination R squared of the prediction.

The best possible score iss 1.0.

In [93]:
lr_confidence = lr.score(X_test, y_test)
print("lr_confidence: ", lr_confidence)

lr_confidence:  0.9802718315853838


### Set x_forecast to the last 30 rows (monthly)
Adj. Close column

In [94]:
x_forecast = np.array(df.drop('Prediction', axis=1))[-forecast_out:]
print(x_forecast)

[[173.15]
 [179.52]
 [179.96]
 [177.36]
 [176.01]
 [177.91]
 [178.99]
 [183.29]
 [184.93]
 [181.46]
 [178.32]
 [175.94]
 [176.62]
 [180.4 ]
 [179.78]
 [183.71]
 [182.34]
 [185.23]
 [184.76]
 [181.88]
 [184.19]
 [183.86]
 [185.09]
 [172.56]
 [168.15]
 [169.39]
 [164.89]
 [159.39]
 [160.06]
 [152.19]]


In [95]:
# Print linear regression model predictions for the next 'n' days
lr_prediction = lr.predict(x_forecast)
print(lr_prediction)

# Print support vector machine model predictions for the next 'n' days
svm_prediction = svr_rbf.predict(x_forecast)
print(svm_prediction)

[177.26861269 183.72400002 184.16989804 181.53504607 180.16694985
 182.0924186  183.18689558 187.54453537 189.20651892 185.69000495
 182.50791449 180.09601153 180.78512666 184.61579607 183.98748521
 187.97016531 186.581801   189.51054031 189.03424014 186.11563488
 188.45659952 188.122176   189.36866366 176.67070397 172.20158967
 173.45821138 168.89789066 163.32416533 164.00314642 156.02765218]
[174.52982398 181.39939439 181.41508903 175.22989004 175.03196251
 177.31137641 180.98695084 186.09614991 179.47606925 185.13194679
 179.04937166 175.14443336 174.30095956 181.723367   181.40462852
 183.9642271  187.92189104 179.50640898 179.68349252 186.8207875
 181.51652356 183.1605588  179.43127469 175.49892779 172.70119659
 172.83005945 172.23986227 167.5710313  166.26077097 163.22131602]
