In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Load stock data (example: CSV with columns 'Date', 'Open', 'High', 'Low', 'Close', 'Volume')
data = pd.read_csv('/content/ABNB_stock_data.csv')

# Feature engineering
data['Close_Lag_1'] = data['Close'].shift(1)
data['Close_Lag_2'] = data['Close'].shift(2)
data['Moving_Avg_5'] = data['Close'].rolling(window=5).mean()

# Drop rows with missing values
data.dropna(inplace=True)

# Features (X) and target (y)
X = data[['Close_Lag_1', 'Close_Lag_2', 'Moving_Avg_5', 'Volume']]
y = data['Close']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.35, shuffle=False)

# Define parameter grid for RandomizedSearchCV
param_distributions = {
    'n_estimators': [100, 200, 300, 400, 500],
    'max_depth': [10, 20, 30, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['auto', 'sqrt', 'log2']
}

# Random Forest model
rf_model = RandomForestRegressor(random_state=42)

# RandomizedSearchCV
random_search = RandomizedSearchCV(
    estimator=rf_model,
    param_distributions=param_distributions,
    n_iter=50,
    cv=3,
    verbose=2,
    random_state=42,
    n_jobs=-1
)

# Fit RandomizedSearchCV
random_search.fit(X_train, y_train)

# Best parameters
print("Best Parameters:", random_search.best_params_)

# Evaluate with the best model
best_rf_model = random_search.best_estimator_
y_pred = best_rf_model.predict(X_test)

mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Optimized Random Forest MAE: {mae:.4f}')
print(f'Optimized Random Forest MSE: {mse:.4f}')
print(f'Optimized Random Forest R²: {r2:.4f}')
