In [None]:
!pip install chart_studio

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import chart_studio.plotly as py
import plotly.graph_objs as go
from plotly.offline import plot

#for offline plotting
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True) 

In [None]:
TCS = pd.read_csv(r'../input/tcs-dataset-from-2015-to-2021/TCS.NSE.csv')


In [None]:
TCS.head()

In [None]:
print (TCS['Volume'])

In [None]:
TCS.info()

In [None]:
TCS.isnull().sum()

In [None]:
TCS['Volume'].describe()

In [None]:
TCS['Volume'].fillna(TCS['Volume'].mean(),inplace=True)
TCS['Open'].fillna(TCS['Open'].mean(),inplace=True)
TCS['High'].fillna(TCS['High'].mean(),inplace=True)
TCS['Low'].fillna(TCS['Low'].mean(),inplace=True)
TCS['Close'].fillna(TCS['Close'].mean(),inplace=True)
TCS['Adj Close'].fillna(TCS['Adj Close'].mean(),inplace=True)

In [None]:
TCS.isnull().sum()

In [None]:
#TCS['Volume'] = TCS['Volume'].astype('int64')

In [None]:
TCS.Volume.dtypes

In [None]:
TCS['Date'] = pd.to_datetime(TCS['Date'])

In [None]:
print(f'Dataframe contains stock prices between {TCS.Date.min()} {TCS.Date.max()}') 
print(f'Total days = {(TCS.Date.max()  - TCS.Date.min()).days} days')

In [None]:
TCS.describe()

In [None]:
TCS[['Open','High','Low','Close','Adj Close']].plot(kind='box')

In [None]:
# Setting the layout for our plot
layout = go.Layout(
    title='Stock Prices of TCS',
    xaxis=dict(
        title='Date',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
        )
    ),
    yaxis=dict(
        title='Price',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
        )
    )
)

TCS_data = [{'x':TCS['Date'], 'y':TCS['Close']}]
plot = go.Figure(data=TCS_data, layout=layout)

In [None]:
#plot(plot) #plotting offline
iplot(plot)

In [None]:
# Building the regression model
from sklearn.model_selection import train_test_split

#For preprocessing
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler

#For model evaluation
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import r2_score

In [None]:
#Split the data into train and test sets
X = np.array(TCS.index).reshape(-1,1)
Y = TCS['Close']
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=101)

In [None]:
# Feature scaling
scaler = StandardScaler().fit(X_train)

In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
#Creating a linear model
lm = LinearRegression()
lm.fit(X_train, Y_train)

In [None]:
#Plot actual and predicted values for train dataset
trace0 = go.Scatter(
    x = X_train.T[0],
    y = Y_train,
    mode = 'markers',
    name = 'Actual'
)
trace1 = go.Scatter(
    x = X_train.T[0],
    y = lm.predict(X_train).T,
    mode = 'lines',
    name = 'Predicted'
)
TCS_data = [trace0,trace1]
layout.xaxis.title.text = 'Day'
plot2 = go.Figure(data=TCS_data, layout=layout)

In [None]:
iplot(plot2)

In [None]:
#Calculate scores for model evaluation
scores = f'''
{'Metric'.ljust(10)}{'Train'.center(20)}{'Test'.center(20)}
{'r2_score'.ljust(10)}{r2_score(Y_train, lm.predict(X_train))}\t{r2_score(Y_test, lm.predict(X_test))}
{'MSE'.ljust(10)}{mse(Y_train, lm.predict(X_train))}\t{mse(Y_test, lm.predict(X_test))}
'''
print(scores)