In [3]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import PolynomialFeatures

# Interactions

In some cases, It makes sense to capture interactions between the features of the dataset.

In [5]:
df = pd.read_csv("../../datasets/pokemon.csv")
atk_df = df[["Attack", "Defense"]]

In [6]:
pf = PolynomialFeatures(degree=2, interaction_only=False, include_bias=False)
res = pf.fit_transform(atk_df)
res

array([[   49.,    49.,  2401.,  2401.,  2401.],
       [   62.,    63.,  3844.,  3906.,  3969.],
       [   82.,    83.,  6724.,  6806.,  6889.],
       ...,
       [  110.,    60., 12100.,  6600.,  3600.],
       [  160.,    60., 25600.,  9600.,  3600.],
       [  110.,   120., 12100., 13200., 14400.]])

## Polynomial Features

Polynomial features generates a new feature matrix consisting in all polynomial interactions. For example, if an input sample is two dimensional and of the form [a, b], the degree-2 polynomial features are [1, a, b, a^2, ab, b^2].

Parameters:

    degree (bool): The degree of the polynomial features.
    interaction_only (bool): If true, only will return an array of the result of the interactions. The output of degree-2 would be [a, b, ab].
    include_bias (bool): If True (default), then include a bias column, the feature in which all polynomial powers are zero

In this case, we get the polynomial interaction between the attack and defense of the pockemons.

In [9]:
pd.DataFrame(pf.powers_, columns=["Attack_degree", "Defense_degree"])

Unnamed: 0,Attack_degree,Defense_degree
0,1,0
1,0,1
2,2,0
3,1,1
4,0,2


### pf.powers_

It says to us, the different combinations of exponents that were used.

In [12]:
common_columns = ["Attack", "Defense", "Attack**2", "Attack x Defense", "Defense**2"]
intr_features = pd.DataFrame(res, columns=common_columns)
intr_features.head()

Unnamed: 0,Attack,Defense,Attack**2,Attack x Defense,Defense**2
0,49.0,49.0,2401.0,2401.0,2401.0
1,62.0,63.0,3844.0,3906.0,3969.0
2,82.0,83.0,6724.0,6806.0,6889.0
3,100.0,123.0,10000.0,12300.0,15129.0
4,52.0,43.0,2704.0,2236.0,1849.0


Here we can cleanly the interactions between the features

In [11]:
new_df = pd.DataFrame([[95, 75], [121, 120], [77, 60]], columns=["Attack", "Defense"])
new_df

Unnamed: 0,Attack,Defense
0,95,75
1,121,120
2,77,60


In [14]:
new_res = pf.transform(new_df)
new_intr_features = pd.DataFrame(new_res, columns=common_columns)

In [18]:
new_intr_features

Unnamed: 0,Attack,Defense,Attack**2,Attack x Defense,Defense**2
0,95.0,75.0,9025.0,7125.0,5625.0
1,121.0,120.0,14641.0,14520.0,14400.0
2,77.0,60.0,5929.0,4620.0,3600.0


Previously, we used fit_transform for the "train dataset", now we use transform to the new inputs

### Let´s try with degree=3

In [20]:
atck_def_speed = df[["Attack", "Defense", "Speed"]]

In [39]:
pf_3 = PolynomialFeatures(degree=3, interaction_only=True, include_bias=False)
res_3 = pf_3.fit_transform(atck_def_speed)
res_3

array([[4.90000e+01, 4.90000e+01, 4.50000e+01, ..., 2.20500e+03,
        2.20500e+03, 1.08045e+05],
       [6.20000e+01, 6.30000e+01, 6.00000e+01, ..., 3.72000e+03,
        3.78000e+03, 2.34360e+05],
       [8.20000e+01, 8.30000e+01, 8.00000e+01, ..., 6.56000e+03,
        6.64000e+03, 5.44480e+05],
       ...,
       [1.10000e+02, 6.00000e+01, 7.00000e+01, ..., 7.70000e+03,
        4.20000e+03, 4.62000e+05],
       [1.60000e+02, 6.00000e+01, 8.00000e+01, ..., 1.28000e+04,
        4.80000e+03, 7.68000e+05],
       [1.10000e+02, 1.20000e+02, 7.00000e+01, ..., 7.70000e+03,
        8.40000e+03, 9.24000e+05]])

In [41]:
pf_3.get_feature_names()
columns_3 = ['Attack', 'Defense', 'Speed', 'Attack x Defense', 'Attack x Speed', 'Defense x Speed', 'Attack x Defense x Speed']

In [42]:
pd.DataFrame(res_3, columns=columns_3)

Unnamed: 0,Attack,Defense,Speed,Attack x Defense,Attack x Speed,Defense x Speed,Attack x Defense x Speed
0,49.0,49.0,45.0,2401.0,2205.0,2205.0,108045.0
1,62.0,63.0,60.0,3906.0,3720.0,3780.0,234360.0
2,82.0,83.0,80.0,6806.0,6560.0,6640.0,544480.0
3,100.0,123.0,80.0,12300.0,8000.0,9840.0,984000.0
4,52.0,43.0,65.0,2236.0,3380.0,2795.0,145340.0
...,...,...,...,...,...,...,...
795,100.0,150.0,50.0,15000.0,5000.0,7500.0,750000.0
796,160.0,110.0,110.0,17600.0,17600.0,12100.0,1936000.0
797,110.0,60.0,70.0,6600.0,7700.0,4200.0,462000.0
798,160.0,60.0,80.0,9600.0,12800.0,4800.0,768000.0
