In [78]:
from sympy import degree, symbols

from sympy import symbols, total_degree, degree
from sympy.polys.monomials import itermonomials

import numpy as np

r = 2
p = 3

# Define the variables
symbol_names = ['x{}'.format(i) for i in range(1, r+1)]
vars = symbols(symbol_names)

print(vars)

# Generate the monomials up to degree 4
monomials_list = list(itermonomials(vars, max_degrees=2*p, min_degrees=0))

print(len(monomials_list))

# print((monomials_list))

monomials_list = [monomial for monomial in monomials_list if total_degree(monomial) >= 3]

# Create a new list that only includes the monomials you want to keep
monomials_list = [monomial for monomial in monomials_list if not (len(monomial.free_symbols) > 1 and any(degree(monomial, gen) > p for gen in monomial.free_symbols))]

# get those with length 1 or 2
monomials_list = [monomial for monomial in monomials_list if len(monomial.free_symbols) == 1 or len(monomial.free_symbols) == 2]

print(len(set(monomials_list)))
print(monomials_list)

[x1, x2]
28
16
[x1**3*x2**2, x1**6, x1**3, x2**3, x2**6, x1*x2**2, x1**2*x2**3, x1**5, x1**3*x2**3, x2**5, x1**2*x2, x1*x2**3, x1**4, x1**3*x2, x2**4, x1**2*x2**2]


In [79]:
from scipy.special import comb

def calculate_combinatorial(r, p):
    # Count for single-variable monomials
    single_variable_count = r * (2*p - 2)

    # Initialize count for two-variable monomials
    two_variable_count = 0
    
    # Calculate two-variable monomial counts
    for d in range(3, 2*p + 1):
        for i in range(1, min(p, d-1) + 1):
            if d - i <= p:
                # Choose any two variables out of r for the monomial
                two_variable_count += comb(r, 2)
                
    # Total unique monomials is the sum of single and two-variable counts
    total_unique_monomials = single_variable_count + two_variable_count
    return total_unique_monomials

# Example calculation for r=8, p=3
r = 2
p = 2
calculate_combinatorial(r, p)


7.0

In [100]:
def generate_polynomial_features(X, p):
    """
    Generates a matrix of polynomial features for input matrix X up to degree p,
    including all combinations of features according to the logic provided by
    calculate_combinatorial.
    """
    N, r = X.shape
    total_terms = int(calculate_combinatorial(r, p))
    print("total_terms", total_terms)
    # Initialize the output matrix with the correct size
    poly_features = np.zeros((N, total_terms))

    # Index to keep track of the current column in the output matrix
    current_index = 0

    # Single-variable monomials for each variable for degrees 3 through 2*p-1
    for i in range(r):
        for degree in range(3, 2*p+1):
            if current_index < total_terms:
                poly_features[:, current_index] = X[:, i] ** degree
                current_index += 1

    # Two-variable monomials
    for degree in range(3, 2*p + 1):
        for i in range(r):
            for j in range(i + 1, r):
                if current_index < total_terms:
                    # For each degree, generate combinations that sum up to 'degree'
                    for deg_i in range(1, min(degree, p+1)):
                        deg_j = degree - deg_i
                        if deg_i <= p and deg_j <= p:
                            poly_features[:, current_index] = (X[:, i] ** deg_i) * (X[:, j] ** deg_j)
                            current_index += 1

    return poly_features

# Testing the function with a small example
X_example = np.array([[1, 2], [3, 4]])
p_example = 2
poly_features_example = generate_polynomial_features(X_example, p_example)

print(poly_features_example.shape)
# This should print the shape as (N, calculate_combinatorial(r, p)) where N is the number of rows in X
# and calculate_combinatorial(r, p) gives the total number of unique polynomial features generated.

print(X_example)

# convert to integer
poly_features_example = poly_features_example.astype(int)
print(poly_features_example)


total_terms 7
(2, 7)
[[1 2]
 [3 4]]
[[  1   1   8  16   4   2   4]
 [ 27  81  64 256  48  36 144]]


In [34]:
# test with a larger example
X = np.random.rand(1000000, 10)
p = 4
poly_features = generate_polynomial_features(X, p)
print(poly_features.shape)

total_terms 735
(1000000, 735)


In [82]:
def generate_polynomial_features_optimized(X, p):
    N, r = X.shape
    # Precompute all powers of X up to 2*p, stored in a dictionary for quick access
    powers = {deg: X**deg for deg in range(1, 2*p + 1)}

    # Calculate total number of terms to initialize the output matrix correctly
    total_terms = int(calculate_combinatorial(r, p))
    poly_features = np.zeros((N, total_terms))

    current_index = 0

    # Generate single-variable monomials
    for i in range(r):
        for degree in range(3, 2*p+1):
            if degree <= 2*p and current_index < total_terms:
                poly_features[:, current_index] = powers[degree][:, i]
                current_index += 1

    # Generate two-variable monomials more efficiently
    for degree in range(3, 2*p + 1):
        for i in range(r):
            for j in range(i + 1, r):
                # Generate combinations only if within degree limit
                for deg_i in range(1, min(degree, p+1)):
                    deg_j = degree - deg_i
                    if deg_i <= p and deg_j <= p and current_index < total_terms:
                        poly_features[:, current_index] = powers[deg_i][:, i] * powers[deg_j][:, j]
                        current_index += 1

    return poly_features

# Example usage
X_example = np.array([[1, 2], [3, 4]])
p_example = 3
poly_features_example = generate_polynomial_features_optimized(X_example, p_example)
print(poly_features_example.shape)
# Expected: (N, calculate_combinatorial(r, p)) where N is the number of rows in X_example

print(poly_features_example)

(2, 16)
[[1.000e+00 1.000e+00 1.000e+00 1.000e+00 8.000e+00 1.600e+01 3.200e+01
  6.400e+01 4.000e+00 2.000e+00 8.000e+00 4.000e+00 2.000e+00 8.000e+00
  4.000e+00 8.000e+00]
 [2.700e+01 8.100e+01 2.430e+02 7.290e+02 6.400e+01 2.560e+02 1.024e+03
  4.096e+03 4.800e+01 3.600e+01 1.920e+02 1.440e+02 1.080e+02 5.760e+02
  4.320e+02 1.728e+03]]


In [83]:
# test with a larger example
X = np.random.rand(100000, 30)
p = 4
poly_features = generate_polynomial_features_optimized(X, p)
print(poly_features.shape)

(100000, 6705)


In [117]:
def generate_polynomial_features_optimized(X, p):
    N, r = X.shape
    # Calculating total terms based on the combinatorial logic
    total_terms = int(calculate_combinatorial(r, p))
    
    # Initializing the output feature matrix
    poly_features = np.zeros((N, total_terms))
    
    current_index = 0
    
    # Generating single-variable monomials
    for degree in range(3, 2*p + 1):
        for i in range(r):
            poly_features[:, current_index] = X[:, i]**degree
            current_index += 1
    
    # Efficient generation of two-variable monomials
    # Precompute powers of X
    powers = np.hstack([X**i for i in range(1, p+1)])
    
    # Generate combinations of indices for two variables
    i_indices, j_indices = np.triu_indices(r, k=1)
    
    # For each degree combination, use broadcasting to apply
    for deg_i in range(1, p+1):
        for deg_j in range(1, p+1):
            if (deg_i + deg_j <= 2*p) & (deg_i + deg_j >= 3):
                feature_combinations = powers[:, (deg_i-1)*r:(deg_i)*r][:, i_indices] * \
                                       powers[:, (deg_j-1)*r:(deg_j)*r][:, j_indices]
                # Flatten and assign to poly_features if within the total terms
                
                for k in range(feature_combinations.shape[1]):
                    
                    if current_index < total_terms:
                        poly_features[:, current_index] = feature_combinations[:, k].flatten()
                        current_index += 1
                        
                        
    return poly_features

# Example usage
X_example = np.array([[1, 2], [3, 4]])
p_example = 2
poly_features_example = generate_polynomial_features_optimized(X_example, p_example)
print(f"Generated polynomial features shape: {poly_features_example.shape}")

# print the integer version
poly_features_example = poly_features_example.astype(int)
print(poly_features_example)


[[ 4]
 [48]]
[[ 2]
 [36]]
[[  4]
 [144]]
Generated polynomial features shape: (2, 7)
[[  1   8   1  16   4   2   4]
 [ 27  64  81 256  48  36 144]]


In [116]:
# test with a larger example
X = np.random.rand(100000, 30)
p = 4
poly_features = generate_polynomial_features_optimized(X, p)

[[2.04648072e-01 4.95504806e-01 8.10181450e-01 ... 1.20811475e-01
  2.79310029e-01 1.24858909e-01]
 [4.44903707e-01 5.95071441e-05 3.02517938e-01 ... 1.14082797e-04
  1.33664111e-02 5.42509712e-04]
 [1.93627783e-01 2.92000853e-03 8.88202095e-03 ... 2.31559439e-01
  5.60435126e-01 2.84838343e-01]
 ...
 [1.40960727e-03 5.75286763e-02 2.20656291e-01 ... 1.90816185e-02
  5.76121875e-02 1.80962290e-01]
 [1.01112249e-02 7.56331585e-02 3.38068066e-02 ... 7.89593953e-01
  5.82029482e-01 6.89058319e-01]
 [3.69785704e-02 2.20338244e-01 2.47785170e-04 ... 2.37813771e-01
  1.04176272e-01 2.46791832e-01]]
[[9.72749590e-02 3.66489365e-01 7.66236812e-01 ... 4.56655388e-02
  1.60529960e-01 7.17611025e-02]
 [4.33301897e-01 6.70262953e-07 2.42950754e-01 ... 1.90147716e-06
  2.41147684e-03 9.78759069e-05]
 [1.41713866e-01 2.62444222e-04 1.39228623e-03 ... 1.13476958e-01
  4.27269581e-01 2.17157622e-01]
 ...
 [9.48728667e-05 2.47355402e-02 1.85810092e-01 ... 7.46755076e-03
  3.91766160e-02 1.23055389e-01]