In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from ydata_profiling import ProfileReport

In [None]:
stocks = pd.read_csv('../datasets/stock_data.csv')

# Convert Date to numerical values
stocks['Date'] = pd.to_datetime(stocks['Date'])
stocks['Year'] = stocks['Date'].dt.year
stocks['Month'] = stocks['Date'].dt.month
stocks['Day'] = stocks['Date'].dt.day
stocks['DayOfWeek'] = stocks['Date'].dt.dayofweek

# Drop Date and target variable
X = stocks.drop(columns=['Close', 'Date'])
Y = stocks['Close']
# ProfileReport(stocks, title="Stock Data Profiling Report", explorative=True).to_file(
#     output_file='stock-data-profiling-report.html')
stocks

In [None]:
# Scale numerical values
numeric_features = ['Open', 'High', 'Low', 'Volume', 'Market_Cap', 'PE_Ratio', 'Dividend_Yield', 'Volatility', 'Sentiment_Score']
scaler = StandardScaler()
X[numeric_features] = scaler.fit_transform(X[numeric_features])

In [None]:
# One-hot encode categorical values
categorical_features = ['Company', 'Sector', 'Trend']
column_transformer = ColumnTransformer(
    transformers=[('encoder', OneHotEncoder(drop='first'), categorical_features)],
    remainder='passthrough')

X = column_transformer.fit_transform(X)

In [None]:
# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=0)

In [None]:
# Train Linear Regression Model
regressor = LinearRegression()
regressor.fit(X_train, y_train)

In [None]:
# Predict
Y_pred = regressor.predict(X_test)

In [None]:
# Evaluate
r2 = r2_score(y_test, Y_pred) * 100
print(f"R² Score: {r2}")