<a href="https://colab.research.google.com/github/bmreiniger/datascience.stackexchange/blob/master/88919-centering.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from sklearn.datasets import load_breast_cancer, load_boston
from sklearn.linear_model import LogisticRegression, Lasso
from sklearn.svm import SVC, SVR
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.base import clone
import numpy as np

just_scale = StandardScaler(with_mean=False)
center_scale = StandardScaler()


# Regression

In [2]:
X, y = load_boston(return_X_y=True)

lasso = Lasso()
svr = SVR(kernel='rbf')

models = [lasso, svr]

for model in models:
    pipe_sc = Pipeline(steps=[('scale', just_scale), ('model', clone(model))])
    pipe_sc.fit(X, y)
    pipe_ce = Pipeline(steps=[('scale', center_scale), ('model', clone(model))])
    pipe_ce.fit(X, y)
    if hasattr(pipe_sc['model'], "coef_"):
        print(np.allclose(pipe_sc['model'].coef_, pipe_ce['model'].coef_))
        print(pipe_sc['model'].intercept_ - pipe_ce['model'].intercept_)
    else:
        print(pipe_sc.predict(X[:5,:]), '\n', pipe_ce.predict(X[:5,:]))


True
-7.252244903373057
[27.02023361 23.42604706 27.38542185 26.78196365 26.53210595] 
 [29.32824737 23.06856722 31.22595594 28.98548025 29.33410188]


#Classification

In [3]:
X, y = load_breast_cancer(return_X_y=True)

lr1sa = LogisticRegression(penalty='l1', solver='saga', max_iter=5000)
lr2sa = LogisticRegression(penalty='l2', solver='saga', max_iter=5000)
lr2lb = LogisticRegression(penalty='l2', solver='lbfgs', max_iter=1000)
svm = SVC(kernel='rbf')

models = [lr1sa, lr2sa, lr2lb, svm]

for model in models:
    print('\n')
    pipe_sc = Pipeline(steps=[('scale', just_scale), ('model', clone(model))])
    pipe_sc.fit(X, y)
    pipe_ce = Pipeline(steps=[('scale', center_scale), ('model', clone(model))])
    pipe_ce.fit(X, y)
    if hasattr(pipe_sc['model'], "coef_"):
        print(pipe_sc['model'].coef_, '\n', pipe_ce['model'].coef_)
        print(pipe_sc['model'].intercept_ - pipe_ce['model'].intercept_)
    else:
        print(pipe_sc.decision_function(X[:5,:]), '\n', pipe_ce.decision_function(X[:5,:]))




[[ 0.32381526  0.          0.          0.          0.          0.
  -1.20458806 -2.16261492  0.55825892  2.36189722 -1.86010002  0.25886237
   0.         -1.35445068 -0.04304869  0.65946115  0.          0.
   0.28310434  0.          0.         -1.33224914  0.         -1.25537231
  -0.46286982  0.         -0.48866728 -1.29922543 -1.0311878  -0.73390537]] 
 [[ 0.          0.          0.          0.          0.          0.
  -0.10422294 -1.0703742   0.          0.1468828  -2.69794569  0.37726895
   0.          0.         -0.32467961  0.89639965  0.          0.
   0.          0.22206571 -1.9769822  -1.77076084 -0.59361568 -1.80590488
  -0.54123424  0.         -1.12941999 -1.23635839 -0.55090744  0.        ]]
[1.53848966]


[[ 0.6480783  -0.12072329  0.49661551 -0.46098839  0.20655278 -0.4880405
  -1.24847151 -1.60065136  0.57014549  1.97221663 -1.35409498  0.27297413
  -0.62022198 -1.21597877 -0.33172009  0.79455495  0.27230935  0.00519954
   0.27649354  0.16703611 -0.37044233 -1.1219136

In [4]:
X.mean(axis=0)

array([1.41272917e+01, 1.92896485e+01, 9.19690334e+01, 6.54889104e+02,
       9.63602812e-02, 1.04340984e-01, 8.87993158e-02, 4.89191459e-02,
       1.81161863e-01, 6.27976098e-02, 4.05172056e-01, 1.21685343e+00,
       2.86605923e+00, 4.03370791e+01, 7.04097891e-03, 2.54781388e-02,
       3.18937163e-02, 1.17961371e-02, 2.05422988e-02, 3.79490387e-03,
       1.62691898e+01, 2.56772232e+01, 1.07261213e+02, 8.80583128e+02,
       1.32368594e-01, 2.54265044e-01, 2.72188483e-01, 1.14606223e-01,
       2.90075571e-01, 8.39458172e-02])