In [13]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

def load_and_prepare_data(file_path):
    data = pd.read_csv(file_path, parse_dates=['Date'], index_col='Date')
    data_daily = data.resample('D').mean().fillna(method='ffill')
    return data_daily


In [14]:
def feature_engineering(data):
    # Add Moving Average (SMA) as a feature for the model
    data['SMA_4'] = data['Close'].rolling(window=4).mean().fillna(method='bfill') 
    features = ['Open', 'High', 'Low', 'Volume', 'SMA_4']
    X = data[features]
    y = data['Close']
    return X, y


In [15]:
def train_random_forest(X_train, y_train):
    # Random Forest Regressor initialization
    rf_regressor = RandomForestRegressor(n_estimators=100, random_state=42)
    rf_regressor.fit(X_train, y_train)
    return rf_regressor

In [16]:
def run_random_forest():
    file_path = './Data/AAPL.csv'
    data_daily = load_and_prepare_data(file_path)
    X, y = feature_engineering(data_daily)
    
    # Splitting the dataset into train and test sets, 80%, 20%
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    rf_regressor = train_random_forest(X_train, y_train)
    # Predict
    y_pred = rf_regressor.predict(X_test)
    # Evaluate
    mse_rf = mean_squared_error(y_test, y_pred)
    r2_score_rf = r2_score(y_test, y_pred)
    return mse_rf, r2_score_rf


In [17]:
mse_rf, r2_score_rf = run_random_forest()

print(f"Mean Squared Error (Random Forest): {mse_rf:.4f}")
print(f"R² Score (Random Forest): {r2_score_rf:.4f}")

Mean Squared Error (Random Forest): 0.0590
R² Score (Random Forest): 0.9999
