In [1]:
# Import Libraries

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import sklearn.metrics as metrics
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import warnings
warnings.filterwarnings("ignore")

# SVM model library
from sklearn.svm import SVR

### Import data

In [2]:
final_df = pd.read_csv('final_df_next_7th_day_15.csv').reset_index(drop=True)

In [3]:
def calculate_metrics(y, ypred):
    MAE = metrics.mean_absolute_error(y, ypred)
    MSE = metrics.mean_squared_error(y, ypred,squared=True)
    RMSE = metrics.mean_squared_error(y, ypred,squared=False)
    R2 = metrics.r2_score(y, ypred)
    return MAE,MSE,RMSE,R2

def plot_result(df,title=None):
    plt.figure(figsize=(25, 6))
    plt.plot( df.iloc[:, 1], label='Actual')
    plt.plot( df.iloc[:, 2], label='Predicted')
    plt.xlabel('Date')
    plt.ylabel('Price')
    plt.title('Actual vs. Predicted Values')
    plt.legend()
    plt.grid()
    plt.title(title)
    plt.show()

### SVR Model

In [24]:

X = final_df.drop(['next_7th_day_closing_price', 'Date'], axis=1)
y = final_df['next_7th_day_closing_price']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create an instance of the SVR model
svr = SVR(C=30000, gamma='scale', kernel='rbf')

# Train the SVR model on the training set
svr.fit(X_train, y_train)

# Make predictions on the testing set
ytrain_pred = svr.predict(X_train)
ytest_pred = svr.predict(X_test)

In [25]:
MAE_train, MSE_train, RMSE_train, R2_train = calculate_metrics(y_train, ytrain_pred)
MAE_test, MSE_test, RMSE_test, R2_test = calculate_metrics(y_test, ytest_pred)

print("Mean Absolute Error (MAE) for Training: %.5f" % MAE_train)
print("Mean Squared Error (MSE) for Training: %.5f" % MSE_train)
print("Root Mean Squared Error (RMSE) for Training: %.5f" % RMSE_train)
print("R-squared score (R2) for Training: %.5f" % R2_train)
print("\n********************************************\n")
print("Mean Absolute Error (MAE) for Testing: %.5f" % MAE_test)
print("Mean Squared Error (MSE) for Testing: %.5f" % MSE_test)
print("Root Mean Squared Error (RMSE) for Testing: %.5f" % RMSE_test)
print("R-squared score (R2) for Testing: %.5f" % R2_test)

Mean Absolute Error (MAE) for Training: 658.07003
Mean Squared Error (MSE) for Training: 2604995.52469
Root Mean Squared Error (RMSE) for Training: 1613.99985
R-squared score (R2) for Training: 0.98743

********************************************

Mean Absolute Error (MAE) for Testing: 702.17087
Mean Squared Error (MSE) for Testing: 3496195.61584
Root Mean Squared Error (RMSE) for Testing: 1869.81165
R-squared score (R2) for Testing: 0.98464


In [34]:
# Determine the size of the train and test set
test_size = 0.2
n_test = int(np.round(len(final_df) * test_size))
train_split = final_df[:len(final_df)-n_test]
test_split = final_df[len(final_df)-n_test:]

x_train = train_split.drop(['next_7th_day_closing_price','Date'],axis=1).values
x_test = test_split.drop(['next_7th_day_closing_price','Date'],axis=1).values

y_train = train_split['next_7th_day_closing_price'].reset_index(drop=True).values
y_test = test_split['next_7th_day_closing_price'].reset_index(drop=True).values

svr = SVR(C=10000,gamma='scale',kernel='linear')
svr.fit(x_train, y_train)

ytrain_pred = svr.predict(x_train)
ytest_pred = svr.predict(x_test)



In [35]:
MAE_train, MSE_train, RMSE_train, R2_train = calculate_metrics(y_train,ytrain_pred)
MAE_test, MSE_test, RMSE_test, R2_test = calculate_metrics(y_test,ytest_pred)

print("Mean Absolute Error (MAE) for Training: %.5f" % MAE_train)
print("Mean Squared Error (MSE) for Training: %.5f" % MSE_train)
print("Root Mean Squared Error (RMSE) for Training: %.5f" % RMSE_train)
print("R-squared score (R2) for Training: %.5f" % R2_train)
print("\n********************************************\n")
print("Mean Absolute Error (MAE) for Testing: %.5f" % MAE_test)
print("Mean Squared Error (MSE) for Testing: %.5f" % MSE_test)
print("Root Mean Squared Error (RMSE) for Testing: %.5f" % RMSE_test)
print("R-squared score (R2) for Testing: %.5f" % R2_test)

Mean Absolute Error (MAE) for Training: 231.97810
Mean Squared Error (MSE) for Training: 348322.55078
Root Mean Squared Error (RMSE) for Training: 590.18857
R-squared score (R2) for Training: 0.97633

********************************************

Mean Absolute Error (MAE) for Testing: 2654.84084
Mean Squared Error (MSE) for Testing: 14422634.94530
Root Mean Squared Error (RMSE) for Testing: 3797.71444
R-squared score (R2) for Testing: 0.92969


In [None]:
plt.scatter(y_train, ytrain_pred)
plt.xlabel('Actual Values')
plt.ylabel('Predicted Values')
plt.title('SVR Train Results')
plt.show()

In [None]:

plt.scatter(y_test, ytest_pred)
plt.xlabel('Actual Values')
plt.ylabel('Predicted Values')
plt.title('SVR Test Results')
plt.show()

In [None]:
errors = y_train - ytrain_pred
plt.hist(errors, bins=20)
plt.xlabel('Error')
plt.ylabel('Frequency')
plt.title('Training Error Distribution')
plt.show()

In [None]:

errors = y_test - ytest_pred
plt.hist(errors, bins=20)
plt.xlabel('Error')
plt.ylabel('Frequency')
plt.title('Testing Error Distribution')
plt.show()

In [None]:
SVR_result_test_df = pd.DataFrame()
SVR_result_test_df['Date'] = test_split['Date']
SVR_result_test_df['y_test'] = test_split['next_7th_day_closing_price']
SVR_result_test_df['y_test_pred'] = ytest_pred.flatten()


SVR_result_train_df = pd.DataFrame()
SVR_result_train_df['Date'] = train_split['Date']
SVR_result_train_df['y_train'] = train_split['next_7th_day_closing_price']
SVR_result_train_df['y_train_pred'] = ytrain_pred.flatten()

In [None]:
plot_result(SVR_result_train_df,'Training Results')

In [None]:
plot_result(SVR_result_train_df,'Training Results')