In [None]:
import numpy as np
import pandas as pd
import plotly.express as px
from sklearn.preprocessing import PolynomialFeatures

In [None]:
import itertools

class FormulaFeatureGenerator():
    def __init__(self, exponents: list = [-1, -.5, 0, 1, 2]):
        self.exponents = exponents
        self.small_number = 0.5
    
    def fit(self, x: np.array):
        self.var_list = np.arange(x.shape[1])
        var_exps_lists = [[(v, e) for e in self.exponents] for v in self.var_list]
        self.combinations = list(itertools.product(*var_exps_lists))
    
    def get_feature_names(self):
        assert hasattr(self, 'combinations'), 'call fit method first'
        feature_names = ['*'.join([f'x_{c[i][0]}^{c[i][1]}' for i in self.var_list])
                        for c in self.combinations]
        return feature_names
    
    def _create_single_feature(self, x: np.array, tup: tuple):
        x[x == 0] = self.small_number
        return [np.prod([float(x[i, j])**ei for j, ei in tup]) for i in range(x.shape[0])]

    def transform(self, x: np.array):
        x_new = np.zeros((x.shape[0], len(self.combinations)))
        for i, tup in enumerate(self.combinations):
            x_new[:, i] = self._create_single_feature(x, tup)
        return x_new
    
    def fit_transform(self, x):
        self.fit(x)
        return self.transform(x)

In [None]:
ffg = FormulaFeatureGenerator(exponents=[0, 1])

In [None]:
# ffg = FormulaFeatureGenerator(exponents=[-1, 0, 1])
# x_new = ffg.fit_transform(x)
# pd.DataFrame(x_new, columns=ffg.get_feature_names())

In [None]:

hasattr(ffg, 'combinations')

In [None]:
ffg.get_feature_names()

In [None]:
np.random.rand(2)

In [None]:
x0 = np.random.uniform(50, 100, 10000)
x1 = np.random.uniform(10, 30, 10000)
x2 = np.random.uniform(10, 30, 10000)
x3 = np.random.uniform(10, 30, 10000)

z = x1
px.scatter_3d(x=x1, y=x2, z=x3, color=z)

In [None]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn import linear_model


X = np.column_stack((x0, x1, x2, x3))
ffg = FormulaFeatureGenerator(exponents=[0, 1])
X_ = ffg.fit_transform(X)

clf = linear_model.Lasso(max_iter=100000)
clf.fit(X_, z)
clf.score(X_, z)

In [None]:
ffg.get_feature_names()

In [None]:
X_[:,4]

In [None]:
x1

In [None]:
z

In [None]:
clf.coef_

In [None]:
ffg.get_feature_names()

In [None]:
df = pd.DataFrame(X_, columns=ffg.get_feature_names())

In [None]:
df.columns

In [None]:
df['x0^-1*x1^0'] + df['x0^0*x1^-1']

In [None]:
z

In [None]:
X_.shape

In [None]:
X[0:10,:]