## PolynominalFeatures調査
- 動作を調べ、ラベル付けを行う

In [50]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import PolynomialFeatures
import sympy

## ダミーデータ作り

In [51]:
# 1番目からn番目までの素数をnp.arrayで返す関数
def primes(n=0):
    ret_primes = list()
    for i in np.arange(n):
        ret_primes.append(sympy.prime(i+1))
    return ret_primes

In [86]:
# コラム名リストからコラムに対してユニークな素数が割り付けられたデータフレームを返す関数
def generate_df_prime_from_column(columns_original = np.array(["a","b","c"])):
    data_original = np.array(primes(len(columns_original)))
    return pd.DataFrame(data=data_original[np.newaxis,:],columns=columns_original,index=["original"])

# テスト
display(generate_df_prime_from_column())

Unnamed: 0,a,b,c
original,2,3,5


## PolynominalFeaturesの動作を調べる
### PolynominalFeatures
- degree : integer
The degree of the polynomial features. Default = 2.
- interaction_only : boolean, default = False
If true, only interaction features are produced: features that are products of at most degree distinct input features (so not x[1] ** 2, x[0] * x[2] ** 3, etc.).
- include_bias : boolean
If True (default), then include a bias column, the feature in which all polynomial powers are zero (i.e. a column of ones - acts as an intercept term in a linear model).
- order : str in {‘C’, ‘F’}, default ‘C’
Order of output array in the dense case. ‘F’ order is faster to compute, but may slow down subsequent estimators.

from https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.PolynomialFeatures.html

### データフレームとしてPolynominalFeatureの結果を返す関数
コラム名もa×bなど、何と何を掛けたものであるかが分かるようにしている

In [101]:
def investigate_PolynominalFeatures(poly=PolynomialFeatures(2),columns_from=["a","b","c"],power=False):
    df_from = generate_df_prime_from_column(columns_from)
    columns_from = df_from.columns
    data_from = df_from.values
    data_poly = poly.fit_transform(df_from)
    # columnをもう一度作り直す
    columns_poly = list()
    for i in np.arange(data_poly.shape[1]):
        if (data_poly[0][i] == 1):
            columns_poly.append("bias")
        else:
            prime_dict=sympy.factorint(data_poly[0][i])
            keys = list(prime_dict.keys())
            column_str = ""
            if power:
                # 累乗で書ける部分は累乗で書く(例:a^2)
                for j in np.arange(len(keys)):
                    column_str += columns_from[list(data_from[0]).index(keys[j])]
                    if prime_dict[keys[j]] > 1:
                        column_str += "^" + str(prime_dict[keys[j]])
                    if (j < len(keys)-1):
                        column_str += "×"
            else:
                # 単純に×で項目をつなげていく(例:a×a×b)
                for j in np.arange(len(keys)):
                    for k in np.arange(prime_dict[keys[j]]):
                        column_str += columns_from[list(data_from[0]).index(keys[j])]
                        if (j < len(keys)-1) | (k < prime_dict[keys[j]]-1):
                            column_str += "×"
            columns_poly.append(column_str)
    return pd.DataFrame(data=data_poly,columns=columns_poly,index=["poly"])

### 調査結果
- バイアス→係数1個→係数2個→・・・の順
- 係数2個の中では、a×(a→b→c)→b×(b→c)→c×cの順
- interaction_only = True (デフォルトはFalse)の時は同じ係数が2個以上登場しないもののみとなる

In [100]:
print("degree=2のとき")
display(investigate_PolynominalFeatures())

print("")
print("degree=2, interaction_only=Trueのとき")
display(investigate_PolynominalFeatures(poly=PolynomialFeatures(degree=2,interaction_only=True)))

print("")
print("degree=2, interaction_only=True, include_bias=Falseのとき")
display(investigate_PolynominalFeatures(poly=PolynomialFeatures(degree=2,interaction_only=True,include_bias=False)))

print("")
print("degree=3, interaction_only=False, include_bias=Falseのとき")
display(investigate_PolynominalFeatures(poly=PolynomialFeatures(degree=3,include_bias=False),power=True))

print("")
print("degree=3, interaction_only=True, include_bias=Falseのとき")
display(investigate_PolynominalFeatures(poly=PolynomialFeatures(degree=3,interaction_only=True,include_bias=False)))

degree=2のとき


Unnamed: 0,bias,a,b,c,a×a,a×b,a×c,b×b,b×c,c×c
poly,1.0,2.0,3.0,5.0,4.0,6.0,10.0,9.0,15.0,25.0



degree=2, interaction_only=Trueのとき


Unnamed: 0,bias,a,b,c,a×b,a×c,b×c
poly,1.0,2.0,3.0,5.0,6.0,10.0,15.0



degree=2, interaction_only=True, include_bias=Falseのとき


Unnamed: 0,a,b,c,a×b,a×c,b×c
poly,2.0,3.0,5.0,6.0,10.0,15.0



degree=3, interaction_only=False, include_bias=Falseのとき


Unnamed: 0,a,b,c,a^2,a×b,a×c,b^2,b×c,c^2,a^3,a^2×b,a^2×c,a×b^2,a×b×c,a×c^2,b^3,b^2×c,b×c^2,c^3
poly,2.0,3.0,5.0,4.0,6.0,10.0,9.0,15.0,25.0,8.0,12.0,20.0,18.0,30.0,50.0,27.0,45.0,75.0,125.0



degree=3, interaction_only=True, include_bias=Falseのとき


Unnamed: 0,a,b,c,a×b,a×c,b×c,a×b×c
poly,2.0,3.0,5.0,6.0,10.0,15.0,30.0
