In [7]:
import pandas as pd
import numpy as np

from sklearn.linear_model import LinearRegression

from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.utils.validation import check_is_fitted, check_array

In [13]:
class StandardScaler(BaseEstimator, TransformerMixin):
    def __init__(self, with_mean=True):
        self.mean_ = None
        self.with_mean = with_mean
        self.scale_ = None

    def fit(self, X, y=None):
        X = check_array(X)
        self.mean_ = np.mean(X, axis=0)
        self.n_features_in_ = X.shape[1]
        self.scale_ = np.std(X, axis=0)
        self.statistics_ = self.scale_.copy()
        return self

    def transform(self, X):
        check_is_fitted(self, 'mean_')
        X = check_array(X)
        if self.n_features_in_ != X.shape[1]:
            raise ValueError(f"X has {X.shape[1]} features, but StandardScaler is fitted with {self.n_features_in_} features.")
        if self.with_mean:
            X -= self.mean_
        return X / self.scale_

In [14]:
import numpy as np

In [15]:
scaler = StandardScaler(with_mean=True)
X = np.random.rand(100, 3)

In [16]:
scaler.fit(X)

In [18]:
scaler.statistics_

array([0.29054244, 0.29130704, 0.29619286])

In [11]:
scaler.fit_transform(X)

array([[-0.32251079, -1.60809411,  1.21987589],
       [ 1.53020565,  1.58719729, -0.48696665],
       [ 1.08000926, -0.78749556, -1.6952231 ],
       [-0.71842404, -1.11558547,  1.10035459],
       [ 0.81994071, -1.92360608, -1.23391596],
       [ 1.37136426, -1.77744729, -0.34586624],
       [ 1.09474791, -0.69849953,  1.70780909],
       [ 0.26432598,  0.85789561, -1.22833211],
       [ 1.17044085,  1.17258931, -0.32266253],
       [-0.39765676,  0.02986685, -1.02943608],
       [ 1.39455365,  1.3775483 ,  1.246827  ],
       [ 1.23556767, -1.10163607, -0.31664448],
       [ 1.53441492,  0.22561642, -0.39028606],
       [ 1.13684541,  1.28527511,  1.43870085],
       [-1.86664947,  0.9825212 ,  0.57406682],
       [ 0.06941887,  0.39421778,  0.45313125],
       [-0.81096372,  1.64304421, -1.41781565],
       [-0.29654591,  0.46535867,  0.13924721],
       [ 0.11557285,  0.54291519,  0.1552058 ],
       [ 1.28625381,  0.02743011,  1.10422549],
       [ 1.28125087, -0.2206009 , -0.373

In [21]:
##pipelines

In [23]:
from sklearn.pipeline import Pipeline, make_pipeline

X = pd.DataFrame(np.random.rand(100, 3), columns=['a', 'b', 'c'])
y = pd.Series(np.random.rand(100))

num_pipelines = Pipeline([
    ('scaler', StandardScaler(with_mean=True)),
    ('model', LinearRegression())
])

num_pipelines.fit(X, y)



In [30]:
num_pipelines.predict(X)

array([0.5272088 , 0.49223156, 0.57215823, 0.5744223 , 0.50338077,
       0.58988446, 0.56451934, 0.51624476, 0.53992768, 0.54457014,
       0.47724167, 0.54548817, 0.54550898, 0.52967376, 0.54898708,
       0.53373139, 0.49505254, 0.57101588, 0.64130725, 0.63150491,
       0.52480825, 0.62053789, 0.6234586 , 0.52546395, 0.52462718,
       0.59584035, 0.56364373, 0.59241837, 0.52243549, 0.59995843,
       0.57080222, 0.50474971, 0.54424485, 0.52618067, 0.57688776,
       0.51591457, 0.61613908, 0.50952275, 0.5268066 , 0.45001547,
       0.54760337, 0.53583461, 0.54872304, 0.53005027, 0.47726659,
       0.55220187, 0.57808228, 0.54619373, 0.5175928 , 0.59207055,
       0.58608743, 0.46825323, 0.52156203, 0.50379661, 0.46992333,
       0.55613467, 0.60605015, 0.56826914, 0.51338566, 0.53095003,
       0.58136256, 0.52993208, 0.57436793, 0.51685768, 0.53114109,
       0.50392075, 0.59495859, 0.58482443, 0.47598922, 0.59771419,
       0.53125757, 0.56908613, 0.52969241, 0.51121185, 0.51437

In [None]:
num_pipelines

In [None]:
from sklearn.metrics import mean_squared_error, r2_score, root_mean_squared_error

In [None]:
root_mean_squared_error