In [49]:
import yfinance as yf
import pandas as pd
import numpy as np
import os
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
import matplotlib.pyplot as plt

In [50]:
ticker = "DLF.NS"
start_date = "2023-01-01"
df = yf.download(ticker, start=start_date)
df.columns = ['Close', 'High', 'Low', 'Open', 'Volume']
df = df.reset_index()
df.dropna(inplace=True)

df = df[['Date', 'Close']]
df.set_index('Date', inplace=True)
df.head()

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2023-01-02,375.029816
2023-01-03,374.931183
2023-01-04,366.841644
2023-01-05,367.729492
2023-01-06,364.769928


In [51]:
def create_windowed_data(data, window_size=7):
    X, y = [], []
    for i in range(window_size, len(data)):
        window = data.iloc[i-window_size:i].values
        target = data.iloc[i]
        
        X.append(window)
        y.append(target)
    
    X = np.array(X)
    y = np.array(y)
    return X, y

window_size = 7
X, y = create_windowed_data(df['Close'], window_size=7)

feature_columns = [f'Day-{i}' for i in range(window_size, 0, -1)]
X_df = pd.DataFrame(X, columns=feature_columns, index=df.index[window_size:])

display(X_df.head())

Unnamed: 0_level_0,Day-7,Day-6,Day-5,Day-4,Day-3,Day-2,Day-1
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2023-01-11,375.029816,374.931183,366.841644,367.729492,364.769928,368.913361,368.025452
2023-01-12,374.931183,366.841644,367.729492,364.769928,368.913361,368.025452,367.088257
2023-01-13,366.841644,367.729492,364.769928,368.913361,368.025452,367.088257,369.850525
2023-01-16,367.729492,364.769928,368.913361,368.025452,367.088257,369.850525,371.971558
2023-01-17,364.769928,368.913361,368.025452,367.088257,369.850525,371.971558,368.716034


In [52]:
os.makedirs("baseline_data", exist_ok=True)
prediction_dates = df.index[window_size:]

y_pred_last_day = X[:, -1]

linear_preds = []
for window in X:
    model = LinearRegression()
    x = np.arange(window_size).reshape(-1, 1)
    model.fit(x, window)
    linear_preds.append(model.predict([[window_size]]).item())
y_pred_linear = np.array(linear_preds)

y_pred_mean = X.mean(axis=1)

quadratic_preds = []
for window in X:
    model = make_pipeline(PolynomialFeatures(degree=2), LinearRegression())
    x = np.arange(window_size).reshape(-1, 1)
    model.fit(x, window)
    quadratic_preds.append(model.predict([[window_size]]).item())
y_pred_quadratic = np.array(quadratic_preds)

In [53]:
def create_results_df(model_name, y_pred):
    return pd.DataFrame({
        'Date': prediction_dates,
        'Actual': y,
        'Predicted': y_pred
    }, index=prediction_dates)

last_day_df = create_results_df("Last Day", y_pred_last_day)
linear_df = create_results_df("Linear", y_pred_linear)
mean_df = create_results_df("Mean", y_pred_mean)
quadratic_df = create_results_df("Quadratic", y_pred_quadratic)

last_day_df.to_csv("baseline_data/last_day_predictions.csv")
linear_df.to_csv("baseline_data/linear_predictions.csv")
mean_df.to_csv("baseline_data/mean_predictions.csv")
quadratic_df.to_csv("baseline_data/quadratic_predictions.csv")