# Baseline Models
**Market Intelligence ML - Project 1**

Train baseline models for benchmarking.

In [None]:
import sys
sys.path.append('..')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, r2_score

from src.models.baseline_models import LinearRegressionModel, RandomForestModel
from src.data.preprocess import split_train_val_test

print('✅ Baseline models ready!')

In [None]:
# Train Linear Regression model
print("Training Linear Regression...")
lr_model = LinearRegressionModel()

# Example: Using dummy data (replace with actual features from notebook 02)
X_example = pd.DataFrame(np.random.randn(1000, 10), columns=[f'feat_{i}' for i in range(10)])
y_example = pd.Series(np.random.randn(1000))

# Split data
train_idx = int(0.7 * len(X_example))
val_idx = int(0.85 * len(X_example))

X_train, y_train = X_example[:train_idx], y_example[:train_idx]
X_val, y_val = X_example[train_idx:val_idx], y_example[train_idx:val_idx]
X_test, y_test = X_example[val_idx:], y_example[val_idx:]

# Train
lr_model.fit(X_train, y_train)

# Predict
y_pred_lr = lr_model.predict(X_test)

# Metrics
mse_lr = mean_squared_error(y_test, y_pred_lr)
r2_lr = r2_score(y_test, y_pred_lr)

print(f"\nLinear Regression Results:")
print(f"  MSE: {mse_lr:.4f}")
print(f"  R²: {r2_lr:.4f}")

# Feature importance
print(f"\nTop 5 Features:")
print(lr_model.get_feature_importance().head())