In [1]:
from mlgrad.risk import ERisk, ERiskGB
from mlgrad.loss import SquareErrorLoss, ErrorLoss
from mlgrad.model import LinearFuncModel, SigmaNeuronModel, FFNetworkModel, \
                        FFNetworkFuncModel, SigmaNeuronModelLayer, LinearModel
from mlgrad.func import SoftPlus, Sqrt, Sigmoidal, HingeSqrt, Arctang, Absolute

from mlgrad import erm_fg, erm_irgd, fg, erm_fg, erisk
from mlgrad.regr import m_regression_irls
from mlgrad.af import averaging_function
from mlgrad.weights import MWeights

import numpy as np

In [2]:
import sklearn.datasets as datasets
import sklearn.metrics as metrics
import sklearn.preprocessing as preprocessing
import matplotlib.pyplot as plt

import sys
print(sys.version)

3.11.2 (main, Mar 13 2023, 12:18:29) [GCC 12.2.0]


In [3]:
# X, Y = datasets.load_boston(return_X_y=True)
# N = len(X)
# print(X.shape, X.dtype)
# print(Y.shape, Y.dtype)
# # print(Y)

In [4]:
from sklearn.datasets import fetch_california_housing
housing = fetch_california_housing()

In [5]:
X = housing.data
Y = housing.target

In [6]:
print(Y.shape, X.shape)
print(Y.dtype, X.dtype)

(20640,) (20640, 8)
float64 float64


In [7]:
X_o = preprocessing.scale(np.ascontiguousarray(X)) # preprocessing.robust_scale(X)
Y_o = Y.copy()

In [8]:
from mlgrad.boost.gb_regression import gb_fit, gb_fit_agg

In [9]:
def new_model(N):
    mod = SigmaNeuronModel(Sigmoidal(1.0), N)
    mod.init_param()
    return mod

m = 20

gb = gb_fit(X_o, Y_o, new_model, h=0.01, tol=1.0e-6, n_iter=m)
lfm, lvals = gb.complex_model, gb.lvals
# print(np.asarray(lfm.weights))

ZeroDivisionError: float division

In [None]:
alpha=0.96
gb_agg, lvals_agg = gb_fit_agg(X_o, Y_o, new_model, alpha=alpha, h=0.01, tol=1.0e-6, n_iter=m)
lfm_agg, lvals_agg = gb_agg.complex_model, gb_agg.lvals

# print(np.asarray(lfm_agg.weights))

In [None]:
print(np.asarray(lfm.weights))
print(np.asarray(lfm_agg.weights))

In [None]:
plt.plot(np.log(lvals), label='ls')
plt.plot(np.log(lvals_agg), label='wm')
plt.legend()
plt.savefig('calhousing_%.0f_m_lvals.eps' % (alpha*100,m))
plt.show()

In [None]:
plt.figure(figsize=(7,5))
plt.loglog(sorted(np.abs(lfm.evaluate_all(X_o)-Y_o)), label='ls') #, marker='o', markersize=1)
plt.loglog(sorted(np.abs(lfm_agg.evaluate_all(X_o)-Y_o)), label='wm') #, marker='o', markersize=1)
plt.legend()
plt.savefig('calhousing_%.0f_%s_errors.eps' % (alpha*100,m))
plt.show()

In [None]:
plt.figure(figsize=(7,6))
plt.scatter(lfm.evaluate_all(X_o)-Y_o, lfm_agg.evaluate_all(X_o)-Y_o, s=9, c='k')
plt.savefig('calhousing_%.0f_errors_box.eps' % (alpha*100,))
plt.show()

In [None]:
Y_ls = np.array([lfm(Xk) for Xk in X_o])
Y_agg = np.array([lfm_agg(Xk) for Xk in X_o])

In [None]:
err_ls = np.abs(Y_o - Y_ls)
err_wm = np.abs(Y_o - Y_agg)

acc_ls = metrics.mean_absolute_error(Y_o, Y_ls)
acc_wm = metrics.mean_absolute_error(Y_o, Y_agg)
var_ls = np.mean(np.abs(err_ls - acc_ls))
var_wm = np.mean(np.abs(err_wm - acc_wm))

print(acc_ls, acc_wm)
print(var_ls, var_wm)

In [None]:
err_ls = np.abs(Y_o - Y_ls)
err_wm = np.abs(Y_o - Y_agg)
acc_ls = np.median(err_ls)
acc_wm = np.median(err_wm)
var_ls = np.mean(np.abs(err_ls - acc_ls))
var_wm = np.mean(np.abs(err_wm - acc_wm))

print(acc_ls, acc_wm)
print(var_ls, var_wm)

In [None]:
plt.figure(figsize=(12,5))
plt.subplot(1,2,1)
plt.scatter(Y_o, Y_ls, c=np.abs(Y_o-Y_ls), s=9, cmap=plt.cm.Greys_r)
plt.plot([0,5], [0,5], color='k')
plt.subplot(1,2,2)
plt.scatter(Y_o, Y_agg, c=np.abs(Y_o-Y_agg), s=9, cmap=plt.cm.Greys_r)
plt.plot([0,5], [0,5], color='k')
plt.subplot(1,2,2)
plt.tight_layout()
plt.show()