In [2]:
pip install yfinance

Collecting yfinanceNote: you may need to restart the kernel to use updated packages.

  Obtaining dependency information for yfinance from https://files.pythonhosted.org/packages/14/03/3c854ca3e02eedf614abba4b2e177c469bf3af58207fa30d5098c5d652fe/yfinance-0.2.37-py2.py3-none-any.whl.metadata
  Downloading yfinance-0.2.37-py2.py3-none-any.whl.metadata (11 kB)
Collecting multitasking>=0.0.7 (from yfinance)
  Obtaining dependency information for multitasking>=0.0.7 from https://files.pythonhosted.org/packages/3e/8a/bb3160e76e844db9e69a413f055818969c8acade64e1a9ac5ce9dfdcf6c1/multitasking-0.0.11-py3-none-any.whl.metadata
  Downloading multitasking-0.0.11-py3-none-any.whl.metadata (5.5 kB)
Collecting frozendict>=2.3.4 (from yfinance)
  Downloading frozendict-2.4.0.tar.gz (314 kB)
     ---------------------------------------- 0.0/314.6 kB ? eta -:--:--
     ---- -------------------------------- 41.0/314.6 kB 653.6 kB/s eta 0:00:01
     -------------- ----------------------- 122.9/314.6 kB 1.0

In [3]:
import pandas as pd
import yfinance as yf
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from datetime import datetime, timedelta

In [4]:
#load historical stock prices data 
ticker_symbol = "HDFCBANK.NS"
today = datetime.today()
one_year_ago = today - timedelta(days = 365)
start_date = "2013-08-23"
end_date = "2023-08-22"
data = yf.download(ticker_symbol, start = start_date, end = end_date)

[*********************100%%**********************]  1 of 1 completed


In [5]:
data.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2013-08-23,296.0,304.575012,294.149994,303.774994,282.453003,10349222
2013-08-26,306.174988,311.399994,301.274994,305.649994,284.196381,7303804
2013-08-27,298.649994,300.0,277.725006,280.950012,261.230072,27354186
2013-08-28,277.850006,286.0,264.0,280.975006,261.253387,18533776
2013-08-29,284.0,291.25,284.0,286.024994,265.948883,20385502


In [6]:
# Define features and target variables
features = ['Open','High','Low','Close','Volume','50d_MA','200d_MA']
target = 'Close'

In [7]:
# Fill NaN values with the first available price
data.fillna(method='ffill', inplace=True)

# Feature Engineering(Adding moving averages as an example)
data['50d_MA'] = data['Close'].rolling(window = 50).mean()
data['200d_MA'] = data['Close'].rolling(window = 200).mean()


In [8]:
# Create Input (X) and output (y) data
X = data[features].dropna()
y = X.pop(target)


In [9]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, shuffle = False)


In [10]:
# Create and train a Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)

In [11]:
# make predictions on the testing set
predictions = model.predict(X_test)
predicted_next_day_close = predictions[-1]
print(f"Predicted Next Day's Closing Price: {predicted_next_day_close: .2f}")

Predicted Next Day's Closing Price:  1588.61


In [12]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np

r2 = r2_score(y_test, predictions)
mae = mean_absolute_error(y_test, predictions)
mse = mean_squared_error(y_test, predictions)
rmse = np.sqrt(mse)
print(f"The R-squared score is:{r2}")
print(f"The Mean Absolute Error is:{mae}")
print(f"The Mean Squared Error is:{mse}")
print(f"The Root mean squared error is:{rmse}")

The R-squared score is:0.996019587658757
The Mean Absolute Error is:5.2800743515603745
The Mean Squared Error is:48.231437233551375
The Root mean squared error is:6.9448856890197534


In [13]:
data.describe()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume,50d_MA,200d_MA
count,2464.0,2464.0,2464.0,2464.0,2464.0,2464.0,2415.0,2265.0
mean,976.362835,985.203763,966.817818,976.172576,946.090048,6694943.0,976.126903,975.545388
std,423.575344,427.235535,419.652497,423.3695,424.088152,7010876.0,414.812302,387.944548
min,277.850006,286.0,264.0,280.950012,261.230072,0.0,318.441501,344.428376
25%,546.393753,550.612488,541.368744,546.037506,515.706299,2814092.0,538.445002,568.689499
50%,986.875,998.725006,977.612518,987.612518,952.917999,4814530.0,996.211505,1022.379375
75%,1380.087494,1397.25,1364.625,1377.887451,1349.926453,8133500.0,1380.926501,1331.114751
max,1723.449951,1757.5,1713.800049,1728.199951,1728.199951,201130000.0,1646.998999,1627.627255


In [20]:
last_row = data.iloc[-1]
print(last_row)

Open         1.600500e+03
High         1.600500e+03
Low          1.587000e+03
Close        1.589500e+03
Adj Close    1.589500e+03
Volume       1.091864e+07
50d_MA       1.646234e+03
200d_MA      1.627627e+03
Name: 2023-08-21 00:00:00, dtype: float64
