<h3>measuring the best possible degree for polynomial regression</h3>

In [2]:
import pandas as pd
import yfinance as yf
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

In [3]:
# for fetching data
def fetch_stock_data(ticker, start_date, end_date):
    data = yf.download(ticker, start=start_date, end=end_date)
    # the yf.download() function from the yfinance library fetches historical stock data from Yahoo Finance, 
    # but it does not download the data onto your machine as a file.
    #  Instead, it retrieves the data from Yahoo Finance's servers and loads it into memory as a pandas DataFrame.
    return data

In [4]:
# for data cleaning
# Check for and handle missing values
# Feature selection: Use only 'Close' price as the target variable
def preprocess_data(data):
    data.dropna(inplace=True)
    data = data[['Close']]
    return data

In [5]:
# spliting into testing and trainingdataset
def split_data(data, test_size=0.2):
    X = np.arange(len(data)).reshape(-1, 1)
    y = data['Close'].values.reshape(-1, 1)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)

    return X_train, X_test, y_train, y_test

In [6]:
# model training on cleaned data
def train_polynomial_regression(data, degree):
    X = np.arange(len(data)).reshape(-1, 1)
    y = data['Close'].values.reshape(-1, 1)

    polynomial_features = PolynomialFeatures(degree=degree)
    X_poly = polynomial_features.fit_transform(X)

    model = LinearRegression()
    model.fit(X_poly, y)

    return model

In [7]:
# training model for various degrees at max 10
def find_best_degree(data, max_degree):
    X_train, X_val, y_train, y_val = split_data(data)

    best_degree = 1
    best_mse = float('inf')

    for degree in range(1, max_degree + 1):
        model = train_polynomial_regression(data, degree)
        X_val_poly = PolynomialFeatures(degree=degree).fit_transform(X_val)
        y_pred = model.predict(X_val_poly)
        mse = mean_squared_error(y_val, y_pred)

        if mse < best_mse:
            best_mse = mse
            best_degree = degree

    return best_degree

In [8]:
# basically the main function
if __name__ == "__main__":
    ticker = "GOOGL"
    start_date_training = "2017-01-01"
    end_date_training = "2022-12-31"
    max_degree = 10

    # Fetch historical stock data from Yahoo Finance for training
    stock_data = fetch_stock_data(ticker, start_date_training, end_date_training)

    # Preprocess the data
    preprocessed_data = preprocess_data(stock_data)

    # Find the best degree for polynomial regression
    best_degree = find_best_degree(preprocessed_data, max_degree)
    print(f"Best degree for polynomial regression: {best_degree}")

[*********************100%***********************]  1 of 1 completed
Best degree for polynomial regression: 8
