In [34]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

In [35]:
df = pd.read_csv("historical_stock_prices.csv")

In [37]:
df = df.drop(df[['Unnamed: 0']], axis = 1)

In [38]:
df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Ticker,MA_50,RSI
0,2002-03-12,22.910000,23.299999,22.910000,23.075001,19.227705,768300,HDFCBANK.NS,23.196700,0.000000
1,2002-03-13,22.924999,23.500000,22.920000,23.475000,19.561012,866550,HDFCBANK.NS,23.216200,66.666773
2,2002-03-14,23.590000,23.594999,23.225000,23.485001,19.569347,406950,HDFCBANK.NS,23.233400,67.213233
3,2002-03-15,23.495001,23.600000,23.355000,23.495001,19.577686,335430,HDFCBANK.NS,23.255400,67.742064
4,2002-03-18,23.799999,24.000000,23.209999,23.930000,19.940153,426940,HDFCBANK.NS,23.286100,81.042734
...,...,...,...,...,...,...,...,...,...,...
32465,2024-09-26,192.750000,194.199997,191.050003,192.570007,192.570007,9537810,FEDERALBNK.NS,194.435999,72.179000
32466,2024-09-27,192.000000,195.500000,191.009995,193.710007,193.710007,17024145,FEDERALBNK.NS,194.378199,71.626983
32467,2024-09-30,193.800003,197.699997,192.899994,196.729996,196.729996,12907403,FEDERALBNK.NS,194.483399,74.912279
32468,2024-10-01,198.500000,200.100006,196.000000,197.119995,197.119995,16206968,FEDERALBNK.NS,194.567199,83.190482


In [39]:
print(df.isnull().sum())

Date         0
Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
Ticker       0
MA_50        0
RSI          0
dtype: int64


In [40]:
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_columns = ['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume', 'MA_50', 'RSI']
df[scaled_columns] = scaler.fit_transform(df[scaled_columns])

In [41]:
df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Ticker,MA_50,RSI
0,2002-03-12,0.010435,0.010583,0.010591,0.010550,0.009254,0.000727,HDFCBANK.NS,0.011011,0.000000
1,2002-03-13,0.010443,0.010681,0.010596,0.010748,0.009425,0.000820,HDFCBANK.NS,0.011021,0.666668
2,2002-03-14,0.010770,0.010728,0.010748,0.010753,0.009429,0.000385,HDFCBANK.NS,0.011030,0.672132
3,2002-03-15,0.010723,0.010730,0.010813,0.010758,0.009434,0.000317,HDFCBANK.NS,0.011041,0.677421
4,2002-03-18,0.010873,0.010927,0.010741,0.010973,0.009620,0.000404,HDFCBANK.NS,0.011057,0.810427
...,...,...,...,...,...,...,...,...,...,...
32465,2024-09-26,0.094043,0.094512,0.094588,0.094405,0.098507,0.009020,FEDERALBNK.NS,0.098352,0.721790
32466,2024-09-27,0.093674,0.095150,0.094568,0.094968,0.099094,0.016101,FEDERALBNK.NS,0.098322,0.716270
32467,2024-09-30,0.094560,0.096231,0.095512,0.096463,0.100649,0.012207,FEDERALBNK.NS,0.098376,0.749123
32468,2024-10-01,0.096874,0.097409,0.097061,0.096656,0.100850,0.015328,FEDERALBNK.NS,0.098419,0.831905


In [42]:
df.to_csv("standardized_historical_stock_prices.csv", index=True)

In [43]:
train_size = int(len(df) * 0.8)
train_data = df[:train_size]
test_data = df[train_size:]

In [44]:
def create_sequences(data, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data.iloc[i:i+seq_length].values)  # Use .iloc to slice DataFrame rows by index
        y.append(data.iloc[i + seq_length]['Close'])  # Predicting 'Close' price for the next day
    return np.array(X), np.array(y)

# Now try creating the sequences again
seq_length = 60  # Using the past 60 days
X_train, y_train = create_sequences(train_data, seq_length)
X_test, y_test = create_sequences(test_data, seq_length)