In [1]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 

from datetime import datetime
import yfinance as yf 

import warnings 
warnings.filterwarnings('ignore')

In [2]:
start_date = '2010-01-01'
end_date = '2024-05-07'

Tcs_data = yf.download('TCS.NS', start=start_date, end=end_date)

[*********************100%%**********************]  1 of 1 completed


In [3]:
Tcs_data.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,377.399994,379.450012,374.524994,375.825012,286.101837,1963682
2010-01-05,377.5,379.774994,373.274994,375.924988,286.178009,2014488
2010-01-06,375.924988,376.100006,366.5,367.424988,279.707123,3349176
2010-01-07,367.75,369.700012,355.5,357.200012,271.923279,6474892
2010-01-08,358.0,359.25,348.25,349.899994,266.365997,6048178


In [4]:
Tcs_data.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2024-04-29,3838.0,3877.800049,3819.199951,3870.199951,3870.199951,1110693
2024-04-30,3872.0,3881.75,3810.0,3820.649902,3820.649902,2203078
2024-05-02,3778.050049,3871.100098,3778.050049,3863.300049,3863.300049,2491044
2024-05-03,3890.149902,3893.600098,3797.600098,3843.399902,3843.399902,2962234
2024-05-06,3853.0,3939.949951,3853.0,3921.050049,3921.050049,2053831


In [5]:
Tcs_data.columns

Index(['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume'], dtype='object')

In [6]:
Tcs_data.reset_index(inplace=True)

In [7]:
Tcs_data

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2010-01-04,377.399994,379.450012,374.524994,375.825012,286.101837,1963682
1,2010-01-05,377.500000,379.774994,373.274994,375.924988,286.178009,2014488
2,2010-01-06,375.924988,376.100006,366.500000,367.424988,279.707123,3349176
3,2010-01-07,367.750000,369.700012,355.500000,357.200012,271.923279,6474892
4,2010-01-08,358.000000,359.250000,348.250000,349.899994,266.365997,6048178
...,...,...,...,...,...,...,...
3532,2024-04-29,3838.000000,3877.800049,3819.199951,3870.199951,3870.199951,1110693
3533,2024-04-30,3872.000000,3881.750000,3810.000000,3820.649902,3820.649902,2203078
3534,2024-05-02,3778.050049,3871.100098,3778.050049,3863.300049,3863.300049,2491044
3535,2024-05-03,3890.149902,3893.600098,3797.600098,3843.399902,3843.399902,2962234


In [8]:
Tcs_data.isnull().sum()

Date         0
Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64

In [9]:
Tcs_data.describe()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
count,3537,3537.0,3537.0,3537.0,3537.0,3537.0,3537.0
mean,2017-03-02 07:40:51.908396800,1742.988174,1760.384143,1724.663794,1742.462412,1590.832332,2763794.0
min,2010-01-04 00:00:00,352.0,357.0,342.625,349.774994,266.365997,0.0
25%,2013-07-26 00:00:00,874.25,896.0,871.799988,887.25,716.313904,1688475.0
50%,2017-03-06 00:00:00,1290.5,1304.0,1277.699951,1290.025024,1095.251221,2310480.0
75%,2020-10-06 00:00:00,2608.0,2655.949951,2600.25,2622.199951,2464.966064,3259116.0
max,2024-05-06 00:00:00,4215.25,4254.75,4177.0,4219.25,4219.25,88067150.0
std,,1081.697301,1090.64703,1072.162624,1081.367516,1093.287969,2227810.0


In [10]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import preprocessing

In [11]:
def prepare_data(Tcs_data, forecast_col, forcast_out, test_size):
    label = Tcs_data[forecast_col].shift(-forecast_out)
    X = np.array(Tcs_data[[forecast_col]])
    X_lately = X[-forecast_out:]
    X = X[:-forecast_out]
    label.dropna(inplace=True)
    y = np.array(label)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=0)

    response = [X_train, X_test, y_train, y_test, X_lately]
    return response

In [12]:
forecast_col = 'Close'
forecast_out = 5
test_size = 0.2 

In [13]:
X_train, X_test, y_train, y_test, X_lately = prepare_data(Tcs_data, forecast_col, forecast_out, test_size)

In [14]:
model = LinearRegression()
model.fit(X_train, y_train)

In [15]:
score = model.score(X_test, y_test)
forecast = model.predict(X_lately)

In [16]:
output = {}
output['test_score'] = score
output['forecast_set'] = forecast

print(output) 

{'test_score': 0.9969395437474617, 'forecast_set': array([3874.51409272, 3824.98397835, 3867.61696627, 3847.72482581,
       3925.3437329 ])}
