### Stock Price Prediction Model using Linear Regression

#### Importing Modules

In [35]:

from datetime import date
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error,r2_score,mean_absolute_error


#### Creating a dataframe

In [36]:
df=pd.read_csv('datasets/stock_data.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,close,low,volume,open,high,adjclose
0,2021-01-04,129.410004,126.760002,143301900,133.520004,133.610001,128.804825
1,2021-01-05,131.009995,128.429993,97664900,128.889999,131.740005,130.397324
2,2021-01-06,126.599998,126.379997,155088000,127.720001,131.050003,126.007957
3,2021-01-07,130.919998,127.860001,109578200,128.360001,131.630005,130.307755
4,2021-01-08,132.050003,130.229996,105158200,132.429993,132.630005,131.432465


#### Preparing the data

In [37]:
def prepare_data(df,forecast_col,forecast_out,test_size):
    #creating new column called label with the last 5 rows are nan
    label=df[forecast_col].shift(-forecast_out)
    #creating the feature array
    X=np.array(df[[forecast_col]])
    #processing the feature array
    X_lately=X[-forecast_out:]
    #X that will contain the training and testing
    X=X[:-forecast_out]
    #dropping na values
    label.dropna(inplace=True)
    #assigning Y
    y=np.array(label)
    #cross validation
    X_train,X_test,Y_train,Y_test=train_test_split(X,y,test_size=test_size,random_state=0)
    response=[X_train,X_test,Y_train,Y_test,X_lately]
    return response
    

In [38]:
forecast_col='close'
forecast_out=5
test_size=0.2
#calling the method where the cross validation and data preparation is in
X_train,X_test,Y_train,Y_test,X_lately=prepare_data(df,forecast_col,forecast_out,test_size)

In [39]:
#initializing the linear regression model
learner=LinearRegression()
#training the linear regression model
learner.fit(X_train,Y_train)
#testing the linear regression model
score=learner.score(X_test,Y_test)
#set that will contain the forecasted data
forecast=learner.predict(X_lately)
#creating json object
response={}
response['test_score']= score
response['forecast_set']=forecast
print(response['forecast_set'])

[146.27329027 144.82084742 141.57863455 142.44072625 141.70044238]


#### Model Metrics

In [40]:
#printout relevant metrics
y_pred=learner.predict(X_test)
print('Coefficients: \n',learner.coef_)
r2_score=learner.score(X_test,Y_test)
Rsquared=str((r2_score*100))
print('R-squared:'+Rsquared+'%')
print("Mean Squared Error:%.2f"%np.mean((learner.predict(X_test)-Y_test)**2))
#Variance Score: 1 is perfect for prediction
print('Variance score:%.2f'%learner.score(X_test,Y_test))

Coefficients: 
 [0.93705806]
R-squared:80.18383468451941%
Mean Squared Error:16.69
Variance score:0.80
