In [1]:
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

from sklearn.ensemble import RandomForestRegressor as sklearn_rf
from sklearn.ensemble import GradientBoostingRegressor as sklearn_gb
from sklearn.linear_model import LinearRegression as sklearn_lr

In [2]:
pip install tulia



In [3]:
from src.ensemble import RandomForestRegressor, GradientBoostingRegressor
from src.linear import LinearRegression

In [4]:
# Generate synthetic data
X, y = make_regression(n_samples=100, n_features=10, random_state=1)

# Split data into training and testing sets
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

### Random Forest

In [5]:
# Train Tulia
tulia = RandomForestRegressor(n_trees=10, max_depth=5, min_samples_split=4, max_features=0.5)
tulia.fit(X_train, y_train)

tulia_pred = tulia.predict(X_test)
tulia_mse = mean_squared_error(y_test, tulia_pred)

# Train scikit-learn
scikit = sklearn_rf(n_estimators=10, max_depth=5, min_samples_split=4, max_features=0.5)
scikit.fit(X_train, y_train)

scikit_pred = scikit.predict(X_test)
scikit_mse = mean_squared_error(y_test, scikit_pred)

In [6]:
print(f"Tulia error: {tulia_mse}")
print(f"Scikit-learn error {scikit_mse}")

Tulia error: 11337.007226626818
Scikit-learn error 13559.726445039438


### Gradient Boosting

In [7]:
# Train Tulia
tulia = GradientBoostingRegressor(learning_rate=1e-3, n_steps=100, max_depth=5, min_samples_split=4, max_features=0.5)
tulia.fit(X_train, y_train)

tulia_pred = tulia.predict(X_test)
tulia_mse = mean_squared_error(y_test, tulia_pred)

# Train scikit-learn
scikit = sklearn_gb(learning_rate=1e-3, n_estimators=10, max_depth=5, min_samples_split=4, max_features=0.5)
scikit.fit(X_train, y_train)

scikit_pred = scikit.predict(X_test)
scikit_mse = mean_squared_error(y_test, scikit_pred)

In [8]:
print(f"Tulia error: {tulia_mse}")
print(f"Scikit-learn error {scikit_mse}")

Tulia error: 24907.95066430781
Scikit-learn error 22840.083891289953


### Linear Regression

In [9]:
# Train Tulia
tulia = LinearRegression(learning_rate=1e-1, n_steps=1000)
tulia.fit(X_train, y_train)

tulia_pred = tulia.predict(X_test)
tulia_mse = mean_squared_error(y_test, tulia_pred)

# Train scikit-learn
scikit = sklearn_lr()  # scikit-learn uses different approach in training linear models.
scikit.fit(X_train, y_train)

scikit_pred = scikit.predict(X_test)
scikit_mse = mean_squared_error(y_test, scikit_pred)

In [10]:
print(f"Tulia error: {tulia_mse}")
print(f"Scikit-learn error {scikit_mse}")

Tulia error: 0.000289187391120129
Scikit-learn error 2.569793284847968e-26
