In [1]:
import numpy as np
import pandas as pd

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.feature_selection import SelectKBest
from sklearn.metrics import mean_squared_error
from sklearn.datasets import load_diabetes

In [2]:
X,y = load_diabetes(return_X_y=True, as_frame=True)

In [3]:
X.shape

(442, 10)

In [4]:
t1 = np.square
t2 = np.tan
t3 = lambda x: np.log(
    np.clip(x, 1e-4, None)
)

ts = (t1,t2,t3)

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)

In [6]:
LinearRegression().fit(X_train, y_train).score(X_train, y_train)

0.5015516188475683

In [7]:
LinearRegression().fit(X_train, y_train).score(X_test, y_test)

0.5675895725793205

In [8]:
best_score = -cross_val_score(LinearRegression(), X_train, y_train, cv=5, scoring="neg_root_mean_squared_error").mean()
best_score

55.62131982131099

In [9]:
to_add = None
transformations = []
for t in ts:
    tX = t(X_train)
    filter_ = SelectKBest(k="all").fit(X_train, y_train)
    cols = X_train.columns[ filter_.pvalues_ <= 0.05]
    
    if to_add is None:
        to_add = t(X_train[cols])
    else:
        to_add = pd.concat(
            (to_add, t(X_train[cols])) , axis=1
        )
    transformations.append(cols)
        

In [10]:
to_add.shape

(353, 15)

In [11]:
transformations

[Index(['bmi', 'bp', 's3', 's4', 's5'], dtype='object'),
 Index(['bmi', 'bp', 's3', 's4', 's5'], dtype='object'),
 Index(['bmi', 'bp', 's3', 's4', 's5'], dtype='object')]

In [12]:
X_train = pd.concat((X_train, to_add), axis=1)

In [13]:
lr = LinearRegression().fit(X_train, y_train)
lr.score(X_train, y_train)

0.5256439936785254

In [14]:
X_test = pd.concat( 
    (X_test,) + tuple( t(X_test[cols]) for t, cols in zip(ts, transformations) ), axis=1
)

In [15]:
lr.score(X_test, y_test)

0.5842822784567175