In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV
from collections import Counter

import datetime as dt

import talib


In [None]:
df = pd.read_csv("C:\\Users\\tcgra\\Documents\\LJMU_Code\Datasets\\infosys_stock.csv")
df.head(5)
df.info()

In [None]:
df = df[['Open', 'High', 'Low', 'Close']]

In [None]:
# A variable for predicting 'n' days out into the future
forecast_out = 100 #'n=30' days
#Create another column (the target ) shifted 'n' units up
df['Prediction'] = df[['Close']].shift(-forecast_out)
df = df.dropna()
#print the new data set
print(df.tail())

In [None]:
## Create the independent data set (X) to train model.
#  Create a variable called ‘X’ 
#  Cnvert the dataframe into a numpy array after dropping the ‘Prediction’ column
#  Store this new data into ‘X’.

X = np.array(df.drop(['Prediction'],1))

#Remove the last '30' rows
X = X[:-forecast_out]
print(X)

In [None]:
### Create the dependent data set (y)  #####
# Convert the dataframe to a numpy array and from the ‘Prediction’ column
y = np.array(df['Prediction'])
# Get all of the y values except the last '30' rows
y = y[:-forecast_out]
print(y)

Feature scaling

In [None]:
scaled = StandardScaler()
scaled.fit(X)
X = scaled.transform(X)

Train Test Split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

## Support Vector Machine Regression

Performing Grid Search for finding best parameters for SVM Regression

In [None]:
#  parameters = {'kernel':('sigmoid',''), 'C':[100,1000,10000,50000,100000],'degree':[1,2],'epsilon':[0.1,1,2,4,5],'tol': [0.1,0.5]}
#  sv = SVR(gamma = 'auto')
#  grid_search = GridSearchCV(sv, parameters, verbose = 3 ,cv = 3)

#  grid_search.fit(X_train,y_train)

#  grid_search.best_estimator_

#SVR(C=50000, cache_size=200, coef0=0.0, degree=1, epsilon=2, gamma='auto',kernel='rbf', max_iter=-1, shrinking=True, tol=0.1, verbose=False)

# SVR(C=100, cache_size=200, coef0=0.0, degree=1, epsilon=5, gamma='auto',
#     kernel='sigmoid', max_iter=-1, shrinking=True, tol=0.1, verbose=False)

## Testing

In [None]:
svr_poly = SVR(C=50000, cache_size=200, coef0=0.0, degree=1, epsilon=2, gamma='auto',
  kernel='rbf', max_iter=-1, shrinking=True, tol=0.1, verbose=False)

In [None]:
svr_poly.fit(X_train, y_train)

In [None]:
svm_poly_predict = svr_poly.predict(X_test)

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

mae = mean_absolute_error(svm_poly_predict,y_test)
rmse = mean_squared_error(svm_poly_predict,y_test, squared= False)

def mean_absolute_percentage_error(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
mape = mean_absolute_percentage_error(svm_poly_predict,y_test)

print('The mae is {} , rmse is {} and mape is {}.'.format(mae,rmse, mape))


In [None]:
# confidence = svr_poly.score(X_test, y_test)
# print('accuracy:',confidence)

In [None]:
from sklearn.metrics import r2_score
accuracy = r2_score(svm_poly_predict, y_test)
print("r2-score:", accuracy)


In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
accuracy = mean_absolute_error(svm_poly_predict, y_test)
print("mae score:", accuracy)

In [None]:
fig=plt.figure(figsize=(30, 15), dpi= 80, facecolor='w', edgecolor='k')
plt.plot(svm_poly_predict[0:50],label = "Predicted values")
plt.plot(y_test[0:100], label = "Actual values")
plt.xlabel('time in day', fontsize = 'xx-large')
plt.ylabel('Stock prices in INR', fontsize = 'xx-large')
plt.title('SVM - Poly Kernel - 100 days', fontsize = 'xx-large')
plt.legend(fontsize = 'xx-large')
plt.show()

In [None]:
svr_rbf = SVR(C=30000, cache_size=200, coef0=0.0, degree=1, epsilon=5, gamma='auto',
  kernel='rbf', max_iter=-1, shrinking=True, tol=0.1, verbose=False)

svr_rbf.fit(X_train, y_train)

svm_rbf_predict = svr_rbf.predict(X_test)

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error 

mae = mean_absolute_error(svm_rbf_predict,y_test)
rmse = mean_squared_error(svm_rbf_predict,y_test, squared= False)

def mean_absolute_percentage_error(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
mape = mean_absolute_percentage_error(svm_rbf_predict,y_test)

print('The mae is {} , rmse is {} and mape is {}.'.format(mae,rmse, mape))

In [None]:
confidence = svr_rbf.score(X_test, y_test)
print('accuracy:',confidence)

In [None]:
from sklearn.metrics import r2_score
accuracy = r2_score(svm_rbf_predict, y_test)
print("r2-score:", accuracy)

In [None]:
fig=plt.figure(figsize=(30, 15), dpi= 80, facecolor='w', edgecolor='k')
plt.plot(svm_rbf_predict[0:50],label = "Predicted values")
plt.plot(y_test[0:50], label = "Actual values")
plt.xlabel('time in day', fontsize = 'xx-large')
plt.ylabel('Stock prices in INR', fontsize = 'xx-large')
plt.title('SVM - RBF Kernel - 100 days to Future', fontsize = 'xx-large')
plt.legend(fontsize = 'xx-large')
plt.show()

In [None]:
svr_linear = SVR(C=30000, cache_size=200, coef0=0.0, degree=1, epsilon=5, gamma='auto',
  kernel='linear', max_iter=-1, shrinking=True, tol=0.1, verbose=False)

svr_linear.fit(X_train, y_train)
svm_linear_predict = svr_linear.predict(X_test)

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error 

mae = mean_absolute_error(svm_linear_predict,y_test)
rmse = mean_squared_error(svm_linear_predict,y_test, squared= False)

def mean_absolute_percentage_error(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
mape = mean_absolute_percentage_error(svm_linear_predict,y_test)

print('The mae is {} , rmse is {} and mape is {}.'.format(mae,rmse, mape))

In [None]:
confidence = svr_linear.score(X_test, y_test)
print('accuracy:',confidence)
    

In [None]:
from sklearn.metrics import r2_score
accuracy = r2_score(svm_linear_predict, y_test)
print("r2-score:", accuracy)

In [None]:
fig=plt.figure(figsize=(30, 15), dpi= 80, facecolor='w', edgecolor='k')
plt.plot(svm_linear_predict[0:50],label = "Predicted values")
plt.plot(y_test[0:50], label = "Actual values")
plt.xlabel('time in day', fontsize = 'xx-large')
plt.ylabel('Stock prices in INR', fontsize = 'xx-large')
plt.title('SVM - RBF Linear - 100 days', fontsize = 'xx-large')
plt.legend(fontsize = 'xx-large')
plt.show()