In [1]:
import quandl #used to get financial data
# https://www.quandl.com/tools/api
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR #uSED FOR SUPPORT VECTOR MACHINE -- REGRESSION
from sklearn.model_selection import train_test_split
print("[SUCCESS]")

[SUCCESS]


In [5]:
# We begin by getting our data stock -- AMAZON
amazon_df = quandl.get('WIKI/AMZN')
print(amazon_df.head())

             Open   High    Low  Close     Volume  Ex-Dividend  Split Ratio  \
Date                                                                          
1997-05-16  22.38  23.75  20.50  20.75  1225000.0          0.0          1.0   
1997-05-19  20.50  21.25  19.50  20.50   508900.0          0.0          1.0   
1997-05-20  20.75  21.00  19.63  19.63   455600.0          0.0          1.0   
1997-05-21  19.25  19.75  16.50  17.13  1571100.0          0.0          1.0   
1997-05-22  17.25  17.38  15.75  16.75   981400.0          0.0          1.0   

            Adj. Open  Adj. High  Adj. Low  Adj. Close  Adj. Volume  
Date                                                                 
1997-05-16   1.865000   1.979167  1.708333    1.729167   14700000.0  
1997-05-19   1.708333   1.770833  1.625000    1.708333    6106800.0  
1997-05-20   1.729167   1.750000  1.635833    1.635833    5467200.0  
1997-05-21   1.604167   1.645833  1.375000    1.427500   18853200.0  
1997-05-22   1.437500   1.

In [6]:
# Because we want to only use the closing price, we don't need to work with all the information
# rewrite our df
amazon_df = amazon_df[['Adj. Close']]
print(df.head())

            Adj. Close
Date                  
1997-05-16    1.729167
1997-05-19    1.708333
1997-05-20    1.635833
1997-05-21    1.427500
1997-05-22    1.395833


In [7]:
#With this, we can create a varaible to store our number of days to predict our stock prices, in this case lets do 15
nday = 15 

# BECAUSE WE ARE USING A PREDICTION ANALYSIS, WE CAN CREATE A NEW COLUMN TO STORE OUR TARGET PREDICTION
amazon_df['Stock Prediction'] = amazon_df[['Adj. Close']].shift(-nday)

print(amazon_df.tail())

            Adj. Close  Stock Prediction
Date                                    
2018-03-21     1581.86               NaN
2018-03-22     1544.10               NaN
2018-03-23     1495.56               NaN
2018-03-26     1555.86               NaN
2018-03-27     1497.05               NaN


In [8]:
# NOW WE CAN CREATE A NEW *INDEPENDENT* DATASET THAT IS USED TO TRAIN FOR OUR RLINEAR REGRESSION MODEL
x_data = np.array(amazon_df.drop(['Stock Prediction'], 1)) # CONVERTS DF --> NP ARRAY
x_data = x_data[:-nday]
print(x_data)

[[   1.72916667]
 [   1.70833333]
 [   1.63583333]
 ...
 [1500.25      ]
 [1523.61      ]
 [1537.64      ]]


In [9]:
# NOW WE CREATE A NEW *DEPENDENT* DATASET
y_data = np.array(amazon_df['Stock Prediction'])

# GET ALL OUR DATA EXCEPT OUR NDAY ROWS
y_data = y_data[:-nday]

print(y_data)

[1.68750000e+00 1.58333333e+00 1.54166667e+00 ... 1.49556000e+03
 1.55586000e+03 1.49705000e+03]


In [10]:
# NOW THAT WE HAVE OUR INDEPENDENT AND DEPENDENT DATASETS, WE CAN SPLIT THE DATA TO TRAIN 
x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size = 0.2)

In [11]:
# FOR THIS MODEL, WE WILL BE USING SUPPORT VECTOR MACHINE -- REGRESSION
# THIS IS A GOOD MODEL TO USE BECAUSE THE MODEL ATTEMPTS TO PREDICT A STOCK PRICE IN THE FUTURE
# AFTER DOING MORE RESEARCH, IT WAS CLEAR THAT USING RBF AS OUR KERNEL, IT WOULD PROVIDE A BETTER OUTCOME
# https://www.cs.princeton.edu/sites/default/files/uploads/saahil_madge.pdf
svr_machine = SVR(kernel = 'rbf', C = 1e3, gamma = 0.1)
svr_machine.fit(x_train, y_train)

SVR(C=1000.0, gamma=0.1)

In [12]:
# NOW WE CAN GET THE SCORE OF THE PREDICTION
SVM_score = svr_machine.score(x_test, y_test)
print(f"SVM Confidence: {SVM_score}")

SVM Confidence: 0.9325720241143853


In [13]:
#NOW WE CAN CREATE THE LINEAR REGRESSION MODEL
lnr = LinearRegression()

lnr.fit(x_train, y_train)

LinearRegression()

In [15]:
# GATHER MODEL'S SCORE
lnr_confidence = lnr.score(x_test, y_test)
print(f"Linear Confidence: {lnr_confidence}")

Linear Confidence: 0.9944187392466749


In [16]:
#WITH BOTH MODELS DONE, WE CAN NOW CREATE OUR PREDICTIONS
pred = np.array(amazon_df.drop(['Stock Prediction'], 1))[-nday:]
print(pred)

[[1545.  ]
 [1551.86]
 [1578.89]
 [1598.39]
 [1588.18]
 [1591.  ]
 [1582.32]
 [1571.68]
 [1544.93]
 [1586.51]
 [1581.86]
 [1544.1 ]
 [1495.56]
 [1555.86]
 [1497.05]]


In [26]:
# NOW PRINT THE STOCK PRICES FOR BOTH MODELS

#LINEAR REGRESSION PREDICTION
lnr_prediction = lnr.predict(pred)
print('Linear Regression Model Prediction Prices -- 15 days out')
for i in lnr_prediction:
    print(f"Stock Price: ${i}")
print()

# SUPPORT VECTOR MACHINE PREDICTION
SVM_prediction = svr_machine.predict(pred)
print('Support Vector Machine (Regression) Prediction Prices -- 15 days out')
for i in SVM_prediction:
    print(f"Stock Price: ${i}")

Linear Regression Model Prediction Prices -- 15 days out
Stock Price: $1590.4179367214256
Stock Price: $1597.4865535748268
Stock Price: $1625.3385526342195
Stock Price: $1645.4315597247914
Stock Price: $1634.911067294292
Stock Price: $1637.8168252427745
Stock Price: $1628.87286106092
Stock Price: $1617.9092920638082
Stock Price: $1590.3458079780235
Stock Price: $1633.1902815588428
Stock Price: $1628.3988721757064
Stock Price: $1589.4905671633987
Stock Price: $1539.4744356671754
Stock Price: $1601.6081960549436
Stock Price: $1541.009747491019

Support Vector Machine (Regression) Prediction Prices -- 15 days out
Stock Price: $680.0930486872135
Stock Price: $676.4495139977995
Stock Price: $676.4495126417371
Stock Price: $676.4495126417371
Stock Price: $676.4495126417371
Stock Price: $676.4495126417371
Stock Price: $676.4495126417371
Stock Price: $676.4495126417371
Stock Price: $680.4865239828938
Stock Price: $676.4495126417371
Stock Price: $676.4495126417371
Stock Price: $689.088100358027

**RESOURCCES THAT HELPED ME**

1. https://www.analyticsvidhya.com/blog/2017/09/understaing-support-vector-machine-example-code/
2. https://towardsdatascience.com/introduction-to-linear-regression-in-python-c12a072bedf0
3. https://www.quora.com/How-do-I-select-SVM-kernels
4. https://www.cs.princeton.edu/sites/default/files/uploads/saahil_madge.pdf
5. https://journalofbigdata.springeropen.com/articles/10.1186/s40537-020-00333-6
6. https://www.researchgate.net/post/Diffference_between_SVM_Linear_polynmial_and_RBF_kernel

