In [None]:
from dataset import load_openml
X, y = load_openml('kin8nm')
print(X.shape)
print(y.shape)

In [None]:
# split into train and test sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
# scale data
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# scale y
y_train = y_train
y_test = y_test

In [None]:
from Mondrian_forest import evaluate_all_lifetimes

In [None]:
M = 10                      # number of Mondrian trees to use
delta = 0 
lifetime_max = 3

In [None]:
results = []
results.append(
    evaluate_all_lifetimes(X_train, 
                           y_train, 
                           X_test, y_test, M, lifetime_max, delta))

In [None]:
import matplotlib.pyplot as plt
plt.plot(results[0][0]['times'], results[0][0]['mse'], label = 'no transformation')
plt.legend()

In [None]:
'''
from others.sir import SlicedInverseRegression
SIR_model = SlicedInverseRegression(n_directions=None)
H = SIR_model.fit(X_train, y_train)
from Mondrian_forest import transform_data, two_one_norm
X_train_transformed, X_test_transformed = transform_data(H/two_one_norm(H), X_train, X_test)
results[1] = \
    evaluate_all_lifetimes(X_train_transformed[:, :5], 
                           y_train, 
                           X_test_transformed[:, :5], y_test, M, lifetime_max, delta)
plt.plot(results[0][0]['times'], results[0][0]['mse'], label = 'no transformation')
plt.plot(results[1][0]['times'], results[1][0]['mse'], label = 'transformation')
plt.legend()
'''

In [None]:
from Mondrian_forest import train
from estimate_H import estimate_H_finite_diff
from Mondrian_forest import transform_data, two_one_norm
history, w_trees = train(
            X_train, y_train, M, lifetime_max, delta
        )
H_1 = estimate_H_finite_diff(X_train, y_train, M, history, w_trees, step_size=0.1)
X_train_transformed, X_test_transformed = transform_data(H_1/two_one_norm(H_1), X_train, X_test)

In [None]:
import numpy as np
eigvals = np.linalg.eig(H_1/two_one_norm(H_1))[0]
# sort eigenvalues
eigvals = np.sort(eigvals)[::-1]
# plot eigenvalues
plt.plot(eigvals)

In [None]:
results.append(
    evaluate_all_lifetimes(X_train_transformed, 
                           y_train, 
                           X_test_transformed, y_test, M, lifetime_max, delta))
plt.plot(results[0][0]['times'], results[0][0]['mse'], label = 'no transformation')
plt.plot(results[1][0]['times'], results[1][0]['mse'], label = 'transformation')
plt.legend()

In [None]:
import numpy as np
history, w_trees = train(
            X_train_transformed, y_train, M, lifetime_max, delta
        )
H_2 = estimate_H_finite_diff(X_train_transformed, y_train, M, history, w_trees, step_size=0.1)
X_train_transformed, X_test_transformed = transform_data(H_2 * two_one_norm(H_1) / two_one_norm(np.matmul(H_2, H_1)), X_train_transformed, X_test_transformed)


In [None]:
results.append(
    evaluate_all_lifetimes(X_train_transformed, 
                           y_train, 
                           X_test_transformed, y_test, M, lifetime_max, delta))
plt.plot(results[0][0]['times'], results[0][0]['mse'], label = 'no transformation')
plt.plot(results[1][0]['times'], results[1][0]['mse'], label = 'transformation')
plt.plot(results[2][0]['times'], results[2][0]['mse'], label = 'transformation-2nd')
plt.legend()

In [None]:
# fit random forest regression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
rf = RandomForestRegressor(n_estimators=10, random_state=42, max_depth=5)
rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)
rf_mse = mean_squared_error(y_test, y_pred)
rf_mse

In [None]:
rf = RandomForestRegressor(n_estimators=10, random_state=42, max_depth=5)
rf.fit(X_train_transformed, y_train)
y_pred = rf.predict(X_test_transformed)
err = mean_squared_error(y_test, y_pred)
err

In [None]:
eigvals = np.linalg.eig(np.matmul(H_2, H_1)/two_one_norm(np.matmul(H_2, H_1)))[0]
# sort eigenvalues
eigvals = np.sort(eigvals)[::-1]
# plot eigenvalues
plt.plot(eigvals)

In [None]:
plt.plot(results[0][0]['times'], results[0][0]['mse'], label = 'no transformation')
plt.plot(results[1][0]['times'], results[1][0]['mse'], label = 'transformation')
plt.plot(results[2][0]['times'], results[2][0]['mse'], label = 'transformation-2nd')
# add a horizontal line for the random forest
plt.axhline(y=rf_mse, color='r', linestyle='-', label = 'random forest')
plt.legend()

In [None]:
plt.plot(results[0][0]['times'], results[0][0]['mse'], label = 'no transformation')
plt.plot(results[1][0]['times'], results[1][0]['mse'], label = 'transformation')
plt.plot(results[2][0]['times'], results[2][0]['mse'], label = 'transformation-2nd')
# add a horizontal line for the random forest
plt.axhline(y=rf_mse, color='r', linestyle='-', label = 'random forest')
plt.axhline(y=err, color='r', linestyle='-', label = 'random forest with transformation')
plt.legend()

In [None]:
from sklearn.pipeline import Pipeline
from others.sir import SlicedInverseRegression
from sklearn.ensemble import RandomForestRegressor
forest = Pipeline([
    ('sir', SlicedInverseRegression(n_directions=None)),
    ('rf', RandomForestRegressor(n_estimators=10,random_state=42, max_depth=5))
    ]).fit(X_train, y_train)
y_pred = forest.predict(X_test)
err = mean_squared_error(y_test, y_pred)
err

In [None]:
from others.save import SlicedAverageVarianceEstimation
forest = Pipeline([
('save', SlicedAverageVarianceEstimation(n_directions=None)),
('rf', RandomForestRegressor(n_estimators=10,random_state=42, max_depth=5))
]).fit(X_train, y_train)
y_pred = forest.predict(X_test)
err = mean_squared_error(y_test, y_pred)
err

In [None]:
sir = SlicedInverseRegression(n_directions=None)
sir.fit(X_train, y_train)
X_train_sir = sir.transform(X_train)
X_test_sir = sir.transform(X_test)

results.append(
    evaluate_all_lifetimes(X_train_sir[:, :5], 
                           y_train, 
                           X_test_sir[:, :5],  y_test, M, lifetime_max, delta))
plt.plot(results[0][0]['times'], results[0][0]['mse'], label = 'no transformation')
plt.plot(results[1][0]['times'], results[1][0]['mse'], label = 'transformation')
plt.plot(results[2][0]['times'], results[2][0]['mse'], label = 'transformation-2nd')
plt.plot(results[3][0]['times'], results[3][0]['mse'], label = 'transformation-sir')
plt.legend()

In [None]:
X_train_sir.shape

In [None]:
save = SlicedAverageVarianceEstimation(n_directions=None)
save.fit(X_train, y_train)
X_train_save = save.transform(X_train)
X_test_save = save.transform(X_test)

results.append(
    evaluate_all_lifetimes(X_train_save, 
                           y_train, 
                           X_test_save, y_test, M, lifetime_max, delta))
plt.plot(results[0][0]['times'], results[0][0]['mse'], label = 'no transformation')
plt.plot(results[1][0]['times'], results[1][0]['mse'], label = 'transformation')
plt.plot(results[2][0]['times'], results[2][0]['mse'], label = 'transformation-2nd')
plt.plot(results[3][0]['times'], results[3][0]['mse'], label = 'transformation-sir')
plt.plot(results[4][0]['times'], results[4][0]['mse'], label = 'transformation-save')
plt.axhline(y=err, color='r', linestyle='-', label = 'random forest with save')
plt.legend()