In [1]:
import numpy as np
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.base import clone
from dataset import load_abalone

# Load a sample dataset
X, y = load_abalone()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
# scale data
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

class RotationForestRegressor:
    def __init__(self, n_estimators=10, n_features_per_subset=3):
        self.n_estimators = n_estimators
        self.n_features_per_subset = n_features_per_subset
        self.estimators_ = []
        self.pca_components_ = []

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.features_indices_ = [np.random.choice(range(n_features), 
                                  self.n_features_per_subset, replace=False)
                                  for _ in range(self.n_estimators)]

        for feature_indices in self.features_indices_:
            X_subset = X[:, feature_indices]
            pca = PCA()
            X_subset_transformed = pca.fit_transform(X_subset)
            self.pca_components_.append(pca.components_)

            estimator = RandomForestRegressor(random_state=np.random.RandomState())
            estimator.fit(X_subset_transformed, y)
            self.estimators_.append(estimator)

    def predict(self, X):
        predictions = np.zeros((X.shape[0], self.n_estimators))

        for idx, (estimator, feature_indices, components) in enumerate(
                zip(self.estimators_, self.features_indices_, self.pca_components_)):
            X_subset = X[:, feature_indices]
            X_subset_transformed = np.dot(X_subset, components.T)
            predictions[:, idx] = estimator.predict(X_subset_transformed)

        return np.mean(predictions, axis=1)

# Create and fit the Rotation Forest
rot_forest = RotationForestRegressor()
rot_forest.fit(X_train, y_train)

# Make predictions
predictions = rot_forest.predict(X_test)


In [2]:
from sklearn.metrics import mean_squared_error

# Assuming you have y_test (actual values) and predictions from the model
mse = mean_squared_error(y_test, predictions)
print("Mean Squared Error:", mse)


Mean Squared Error: 27.30648678560141
