In [56]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

#import chart_studio.plotly as py
import plotly.graph_objs as go 
from plotly import plot

from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)

In [57]:
stock = pd.read_csv('https://raw.githubusercontent.com/grassnhi/AI_ML_DL/main/STOCK_Prediction/Datasets/Train/VNI.csv', parse_dates=['Date'])
stock["Price"] = pd.to_numeric(stock["Price"].apply(lambda x: x.replace(",", "")))
stock = stock.iloc[::-1]
stock.head()

Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %
129,2021-07-01,1417.08,1412.15,1417.27,1402.18,753.56K,0.61%
128,2021-07-02,1420.27,1422.89,1424.28,1415.82,706.83K,0.23%
127,2021-07-05,1411.13,1420.27,1421.52,1394.12,774.45K,-0.64%
126,2021-07-06,1354.79,1411.13,1418.99,1354.79,773.51K,-3.99%
125,2021-07-07,1388.55,1354.79,1388.55,1334.58,733.80K,2.49%


In [58]:
stock['Date'] = pd.to_datetime(stock['Date'])
print(f'Dataframe start at {stock.Date.min()} and end at {stock.Date.max()}')
print(f'Total day: {(stock.Date.max() - stock.Date.min()).days} days')

Dataframe start at 2021-07-01 00:00:00 and end at 2021-12-31 00:00:00
Total day: 183 days


In [59]:
#Setting layout
layout = go.Layout(
    title='Stock Price',
    xaxis= dict(
        title ='Date',
        titlefont = dict(
            family='Courier New, monospace',
            size = 18,
            color='#7f7f7f'
            )
        ),
    yaxis=dict(
        title ='Price',
        titlefont = dict(
            family='Courier New, monospace',
            size = 18,
            color='#7f7f7f'
        )
    )
)
stock_data = [{'x':stock['Date'], 'y': stock['Price']}]
plot = go.Figure(data = stock_data, layout = layout)

In [60]:
iplot(plot)

In [61]:
from sklearn.model_selection import train_test_split

from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler

from sklearn.metrics import mean_absolute_error as mae 
from sklearn.metrics import r2_score

In [62]:
X = np.array(stock.index).reshape(-1,1)
Y = stock['Price']
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.1, random_state=101)

In [63]:
#Feature scaling
scaler = StandardScaler().fit(X_train)

In [64]:
from sklearn.linear_model import LinearRegression
lm = LinearRegression()
lm.fit(X_train, Y_train)

In [65]:
#Plot actual and predict values
trace0 = go.Scatter(
    x = X_train.T[0],
    y = Y_train,
    mode = 'markers',
    name = 'Actual'
)
trace1 = go.Scatter(
    x = X_train.T[0],
    y = lm.predict(X_train).T,
    mode = 'lines',
    name = 'Predicted'
)
stock_data = [trace0, trace1]
layout.xaxis.title.text = 'Day'
plot2 = go.Figure(data=stock_data, layout=layout)

iplot(plot2)

In [67]:
scores = f'''
{'Metric'.ljust(10)}{'Train'.center(20)}{'Test'.center(20)}
{'r2_score'.ljust(10)}{r2_score(Y_train, lm.predict(X_train))}\t{r2_score(Y_test, lm.predict(X_test))}
{'MAE'.ljust(10)}{mae(Y_train, lm.predict(X_train))}\t{mae(Y_test, lm.predict(X_test))}
'''
print(scores)


Metric           Train                Test        
r2_score  0.7613889010168328	0.4806849552848266
MAE       25.356926576980666	40.78263911389791

