In [2]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

# Simulated data
np.random.seed(42)
x = np.linspace(0, 10, 100)
y = np.sin(x) * 10 + np.random.normal(0, 1, len(x))

# Split into train/test
split = 80
x_train, x_test = x[:split].reshape(-1,1), x[split:].reshape(-1,1)
y_train, y_test = y[:split], y[split:]

# Models with different complexities
models = {
    "Shallow Forest (max_depth=3)": RandomForestRegressor(n_estimators=50, max_depth=3, random_state=42),
    "Deep Forest (max_depth=15)": RandomForestRegressor(n_estimators=50, max_depth=15, random_state=42)
}

for name, model in models.items():
    model.fit(x_train, y_train)
    train_rmse = np.sqrt(mean_squared_error(y_train, model.predict(x_train)))
    test_rmse = np.sqrt(mean_squared_error(y_test, model.predict(x_test)))
    print(f"{name} â†’ Train RMSE: {train_rmse:.2f}, Test RMSE: {test_rmse:.2f}")

# Plot for visualization
plt.figure(figsize=(9,5))
plt.scatter(x_train, y_train, color='blue', alpha=0.5, label='Training Data')
plt.scatter(x_test, y_test, color='gray', alpha=0.5, label='Testing Data')
for name, model in models.items():
    plt.plot(x, model.predict(x.reshape(-1,1)), label=name)
plt.title("Overfitting vs Generalization")
plt.xlabel("Feature (x)")
plt.ylabel("Target (y)")
plt.legend(); plt.grid(True); plt.show()


ValueError: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject