In [1]:
import sys, os
sys.path.append(os.path.dirname(os.getcwd()))
import pickle 
import numpy as np
import pandas as pd
from tqdm import tqdm
from scipy.linalg import expm
from scipy.stats import wishart
from joblib import Parallel, delayed
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from pyfrechet.metric_spaces import MetricData, LogCholesky, spd_to_log_chol, log_chol_to_spd
from pyfrechet.regression.bagged_regressor import BaggedRegressor
from pyfrechet.regression.trees import Tree

INFO: Using numpy backend


In [42]:
path='C:/Users/J2404/OneDrive/Documentos/GitHub/pyfrechet/simulations_SPD'
file=os.listdir(os.path.join(path, 'data'))[2]
M=LogCholesky(dim=2)

In [43]:
with open(os.path.join(path, 'data/'+file), 'rb') as f:
        sample = pickle.load(f)
X=np.c_[sample['sample'][0]]
sampleY_LogChol=np.c_[[spd_to_log_chol(A) for A in sample['sample'][1]]]
y=MetricData(M, sampleY_LogChol)

# Train/test partition and scaling data
train_idx, test_idx=train_test_split(np.arange(len(X)), test_size=100)
X_train=X[train_idx]
X_test=X[test_idx]
y_train=y[train_idx]
y_test=y[test_idx]
scaler=MinMaxScaler(feature_range=(0,1))
X_train=scaler.fit_transform(X_train)
X_test=scaler.transform(X_test)

In [44]:
base = Tree(split_type='2means',
        impurity_method='cart',
        mtry=None, # It is a regression curve setting, only one predictor
        min_split_size=1)
forest = BaggedRegressor(estimator=base,
                        n_estimators=100,
                        bootstrap_fraction=1,
                        bootstrap_replace=True,
                        n_jobs=-1)
forest.fit(X_train, y_train)

results={'train_indices': train_idx,
            'y_train_data': y_train.data,
            'train_predictions': forest.predict(X_train).data,
            'y_test_data': y_test.data,
            'test_predictions': forest.predict(X_test).data,
            'oob_errors': forest.oob_errors()}

In [45]:
alpha=0.05
Dalpha=np.percentile(results['oob_errors'], (1-alpha)*100)
np.mean(M.d(results['test_predictions'], results['y_test_data']) <= Dalpha)

0.93

In [65]:
Sigma_1=np.array([[1, -0.6],
                  [-0.6, 0.5]])
Sigma_2=np.array([[1, 0],
                  [0, 1]])
Sigma_3=np.array([[0.5, 0.4],
                  [0.4, 1]])

np.linalg.eigvals(Sigma_1)

array([1.4, 0.1])