## Feature engineering manual

In [1]:
import pandas as pd
from sklearn.model_selection import KFold, train_test_split
import gc, os
import numpy as np
from tqdm import tqdm
from itertools import combinations

pd.set_option('display.width', 200)
pd.set_option('display.max_columns', 200)

In [2]:
train = pd.read_csv('../data/raw/calories.csv')

In [3]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200000 entries, 0 to 199999
Data columns (total 9 columns):
 #   Column      Non-Null Count   Dtype  
---  ------      --------------   -----  
 0   User_ID     200000 non-null  int64  
 1   Gender      200000 non-null  object 
 2   Age         200000 non-null  int64  
 3   Height      200000 non-null  float64
 4   Weight      200000 non-null  float64
 5   Duration    200000 non-null  float64
 6   Heart_Rate  200000 non-null  float64
 7   Body_Temp   200000 non-null  float64
 8   Calories    200000 non-null  float64
dtypes: float64(6), int64(2), object(1)
memory usage: 13.7+ MB


In [4]:
numerical_features = ['Age', 'Height', 'Weight', 'Duration', 'Heart_Rate', 'Body_Temp']

def add_feature_cross_terms(df, numerical_features):
    for f1, f2 in combinations(numerical_features, 2):
        df[f'{f1}_x_{f2}'] = df[f1] * df[f2]

    for f1, f2, f3 in combinations(numerical_features, 3):
            df[f'{f1}_x_{f2}_x_{f3}'] = df[f1] * df[f2] * df[f3]

    return df

def squares(df, features):
    for feature in features:
        df[f'{feature}_2'] = df[feature]**2

    return df

In [5]:
def preprocesamiento(df):

    # ---- flags & básicos ---------------------------
    df['Gender'] = (df['Gender'] == 'male').astype('int8')
    df['BMI'] = df['Weight'] / (df['Height'] / 100)**2
    df['FCMT_simple'] = 220 - df['Age']
    df['FCMT_tanaka'] = 208 - (0.7 * df['Age'])
    df['Percent_FCMT_simple'] = np.where(df['FCMT_simple'] > 0, (df['Heart_Rate'] / df['FCMT_simple']) * 100, 0)
    df['Percent_FCMT_tanaka'] = np.where(df['FCMT_tanaka'] > 0, (df['Heart_Rate'] / df['FCMT_tanaka']) * 100, 0)
    # Cliping Percent_FCMT a un rango razonable (e.g., 0-150%) para evitar valores extremos si los datos son ruidosos
    df['Percent_FCMT_simple'] = np.clip(df['Percent_FCMT_simple'], 0, 150)
    df['Percent_FCMT_tanaka'] = np.clip(df['Percent_FCMT_tanaka'], 0, 150)
    # Desviación de Temperatura Corporal (Esta SÍ es válida como feature)
    df['Body_Temp_Deviation'] = df['Body_Temp'] - 37.0

    df['Pct_FCMT_sq'] = df['Percent_FCMT_simple']**2
    df['Pct_FCMT_cu'] = df['Percent_FCMT_simple']**3

    # logs
    for col in ['Duration', 'Heart_Rate', 'Body_Temp', 'Weight', 'Duration_x_Heart_Rate']:
        df[f'{col}_log'] = np.log1p(df[col])

    df['is_temp_high']  = (df['Body_Temp']>39).astype('int8')
    df['is_sobrep']  = (df['BMI']>27).astype('int8')

    df['feno_var'] = np.where(df['Gender'] ==1,
                              -55.0969*df['Duration'] + 0.6309*df['Duration_x_Heart_Rate'] + 0.1988*df['Weight_x_Duration'] + 0.2017*df['Age_x_Duration'],
                              -20.4022*df['Duration'] + 0.4472*df['Duration_x_Heart_Rate'] + 0.1263*df['Weight_x_Duration'] + 0.074*df['Age_x_Duration']
                             )

    return df

In [6]:
train = add_feature_cross_terms(train, numerical_features)
train = squares(train, numerical_features)
preprocesamiento(train)

Unnamed: 0,User_ID,Gender,Age,Height,Weight,Duration,Heart_Rate,Body_Temp,Calories,Age_x_Height,Age_x_Weight,Age_x_Duration,Age_x_Heart_Rate,Age_x_Body_Temp,Height_x_Weight,Height_x_Duration,Height_x_Heart_Rate,Height_x_Body_Temp,Weight_x_Duration,Weight_x_Heart_Rate,Weight_x_Body_Temp,Duration_x_Heart_Rate,Duration_x_Body_Temp,Heart_Rate_x_Body_Temp,Age_x_Height_x_Weight,Age_x_Height_x_Duration,Age_x_Height_x_Heart_Rate,Age_x_Height_x_Body_Temp,Age_x_Weight_x_Duration,Age_x_Weight_x_Heart_Rate,Age_x_Weight_x_Body_Temp,Age_x_Duration_x_Heart_Rate,Age_x_Duration_x_Body_Temp,Age_x_Heart_Rate_x_Body_Temp,Height_x_Weight_x_Duration,Height_x_Weight_x_Heart_Rate,Height_x_Weight_x_Body_Temp,Height_x_Duration_x_Heart_Rate,Height_x_Duration_x_Body_Temp,Height_x_Heart_Rate_x_Body_Temp,Weight_x_Duration_x_Heart_Rate,Weight_x_Duration_x_Body_Temp,Weight_x_Heart_Rate_x_Body_Temp,Duration_x_Heart_Rate_x_Body_Temp,Age_2,Height_2,Weight_2,Duration_2,Heart_Rate_2,Body_Temp_2,BMI,FCMT_simple,FCMT_tanaka,Percent_FCMT_simple,Percent_FCMT_tanaka,Body_Temp_Deviation,Pct_FCMT_sq,Pct_FCMT_cu,Duration_log,Heart_Rate_log,Body_Temp_log,Weight_log,Duration_x_Heart_Rate_log,is_temp_high,is_sobrep,feno_var
0,301957,0,21,188.0,84.0,16.0,100.0,40.4,80.0,3948.0,1764.0,336.0,2100.0,848.4,15792.0,3008.0,18800.0,7595.2,1344.0,8400.0,3393.6,1600.0,646.4,4040.0,331632.0,63168.0,394800.0,159499.2,28224.0,176400.0,71265.6,33600.0,13574.4,84840.0,252672.0,1579200.0,637996.8,300800.0,121523.2,759520.0,134400.0,54297.6,339360.0,64640.0,441,35344.0,7056.0,256.0,10000.0,1632.16,23.766410,199,193.3,50.251256,51.733057,3.4,2525.188758,126893.907430,2.833213,4.615121,3.723281,4.442651,7.378384,1,0,583.6960
1,407676,0,48,165.0,68.0,11.0,91.0,40.2,54.0,7920.0,3264.0,528.0,4368.0,1929.6,11220.0,1815.0,15015.0,6633.0,748.0,6188.0,2733.6,1001.0,442.2,3658.2,538560.0,87120.0,720720.0,318384.0,35904.0,297024.0,131212.8,48048.0,21225.6,175593.6,123420.0,1021020.0,451044.0,165165.0,72963.0,603603.0,68068.0,30069.6,248757.6,40240.2,2304,27225.0,4624.0,121.0,8281.0,1616.04,24.977043,172,174.4,52.906977,52.178899,3.2,2799.148188,148094.468097,2.484907,4.521789,3.718438,4.234107,6.909753,1,0,356.7674
2,244464,1,28,178.0,77.0,23.0,103.0,40.5,125.0,4984.0,2156.0,644.0,2884.0,1134.0,13706.0,4094.0,18334.0,7209.0,1771.0,7931.0,3118.5,2369.0,931.5,4171.5,383768.0,114632.0,513352.0,201852.0,49588.0,222068.0,87318.0,66332.0,26082.0,116802.0,315238.0,1411718.0,555093.0,421682.0,165807.0,742527.0,182413.0,71725.5,321205.5,95944.5,784,31684.0,5929.0,529.0,10609.0,1640.25,24.302487,192,188.4,53.645833,54.670913,3.5,2877.875434,154386.025888,3.178054,4.644391,3.725693,4.356709,7.770645,1,0,709.3430
3,38748,0,32,156.0,54.0,29.0,105.0,41.1,179.0,4992.0,1728.0,928.0,3360.0,1315.2,8424.0,4524.0,16380.0,6411.6,1566.0,5670.0,2219.4,3045.0,1191.9,4315.5,269568.0,144768.0,524160.0,205171.2,50112.0,181440.0,71020.8,97440.0,38140.8,138096.0,244296.0,884520.0,346226.4,475020.0,185936.4,673218.0,164430.0,64362.6,233037.0,125149.5,1024,24336.0,2916.0,841.0,11025.0,1689.21,22.189349,188,185.6,55.851064,56.573276,4.1,3119.341331,174218.531780,3.401197,4.663439,3.740048,4.007333,8.021585,1,0,1036.5180
4,297351,1,24,172.0,75.0,6.0,77.0,39.4,9.0,4128.0,1800.0,144.0,1848.0,945.6,12900.0,1032.0,13244.0,6776.8,450.0,5775.0,2955.0,462.0,236.4,3033.8,309600.0,24768.0,317856.0,162643.2,10800.0,138600.0,70920.0,11088.0,5673.6,72811.2,77400.0,993300.0,508260.0,79464.0,40660.8,521813.6,34650.0,17730.0,227535.0,18202.8,576,29584.0,5625.0,36.0,5929.0,1552.36,25.351541,196,191.2,39.285714,40.271967,2.4,1543.367347,60632.288630,1.945910,4.356709,3.698830,4.330733,6.137727,1,0,79.3992
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
199995,284405,1,52,186.0,85.0,15.0,100.0,40.6,94.0,9672.0,4420.0,780.0,5200.0,2111.2,15810.0,2790.0,18600.0,7551.6,1275.0,8500.0,3451.0,1500.0,609.0,4060.0,822120.0,145080.0,967200.0,392683.2,66300.0,442000.0,179452.0,78000.0,31668.0,211120.0,237150.0,1581000.0,641886.0,279000.0,113274.0,755160.0,127500.0,51765.0,345100.0,60900.0,2704,34596.0,7225.0,225.0,10000.0,1648.36,24.569314,168,171.6,59.523810,58.275058,3.6,3543.083900,210897.851204,2.772589,4.615121,3.728100,4.454347,7.313887,1,0,530.6925
199996,294812,0,44,161.0,62.0,27.0,99.0,40.7,152.0,7084.0,2728.0,1188.0,4356.0,1790.8,9982.0,4347.0,15939.0,6552.7,1674.0,6138.0,2523.4,2673.0,1098.9,4029.3,439208.0,191268.0,701316.0,288318.8,73656.0,270072.0,111029.6,117612.0,48351.6,177289.2,269514.0,988218.0,406267.4,430353.0,176922.9,648717.3,165726.0,68131.8,249816.6,108791.1,1936,25921.0,3844.0,729.0,9801.0,1656.49,23.918830,176,177.2,56.250000,55.869074,3.7,3164.062500,177978.515625,3.332205,4.605170,3.730501,4.143135,7.891331,1,0,943.8444
199997,496827,1,78,176.0,78.0,22.0,104.0,40.8,173.0,13728.0,6084.0,1716.0,8112.0,3182.4,13728.0,3872.0,18304.0,7180.8,1716.0,8112.0,3182.4,2288.0,897.6,4243.2,1070784.0,302016.0,1427712.0,560102.4,133848.0,632736.0,248227.2,178464.0,70012.8,330969.6,302016.0,1427712.0,560102.4,402688.0,157977.6,746803.2,178464.0,70012.8,330969.6,93350.4,6084,30976.0,6084.0,484.0,10816.0,1664.64,25.180785,142,153.4,73.239437,67.796610,3.8,5364.015076,392857.442213,3.135494,4.653960,3.732896,4.369448,7.735870,1,0,918.6254
199998,521026,0,54,161.0,64.0,11.0,96.0,39.9,60.0,8694.0,3456.0,594.0,5184.0,2154.6,10304.0,1771.0,15456.0,6423.9,704.0,6144.0,2553.6,1056.0,438.9,3830.4,556416.0,95634.0,834624.0,346890.6,38016.0,331776.0,137894.4,57024.0,23700.6,206841.6,113344.0,989184.0,411129.6,170016.0,70662.9,616694.4,67584.0,28089.6,245145.6,42134.4,2916,25921.0,4096.0,121.0,9216.0,1592.01,24.690405,166,170.2,57.831325,56.404230,2.9,3344.462186,193414.680642,2.484907,4.574711,3.711130,4.174387,6.963190,1,0,380.6902


In [7]:
train.head()

Unnamed: 0,User_ID,Gender,Age,Height,Weight,Duration,Heart_Rate,Body_Temp,Calories,Age_x_Height,Age_x_Weight,Age_x_Duration,Age_x_Heart_Rate,Age_x_Body_Temp,Height_x_Weight,Height_x_Duration,Height_x_Heart_Rate,Height_x_Body_Temp,Weight_x_Duration,Weight_x_Heart_Rate,Weight_x_Body_Temp,Duration_x_Heart_Rate,Duration_x_Body_Temp,Heart_Rate_x_Body_Temp,Age_x_Height_x_Weight,Age_x_Height_x_Duration,Age_x_Height_x_Heart_Rate,Age_x_Height_x_Body_Temp,Age_x_Weight_x_Duration,Age_x_Weight_x_Heart_Rate,Age_x_Weight_x_Body_Temp,Age_x_Duration_x_Heart_Rate,Age_x_Duration_x_Body_Temp,Age_x_Heart_Rate_x_Body_Temp,Height_x_Weight_x_Duration,Height_x_Weight_x_Heart_Rate,Height_x_Weight_x_Body_Temp,Height_x_Duration_x_Heart_Rate,Height_x_Duration_x_Body_Temp,Height_x_Heart_Rate_x_Body_Temp,Weight_x_Duration_x_Heart_Rate,Weight_x_Duration_x_Body_Temp,Weight_x_Heart_Rate_x_Body_Temp,Duration_x_Heart_Rate_x_Body_Temp,Age_2,Height_2,Weight_2,Duration_2,Heart_Rate_2,Body_Temp_2,BMI,FCMT_simple,FCMT_tanaka,Percent_FCMT_simple,Percent_FCMT_tanaka,Body_Temp_Deviation,Pct_FCMT_sq,Pct_FCMT_cu,Duration_log,Heart_Rate_log,Body_Temp_log,Weight_log,Duration_x_Heart_Rate_log,is_temp_high,is_sobrep,feno_var
0,301957,0,21,188.0,84.0,16.0,100.0,40.4,80.0,3948.0,1764.0,336.0,2100.0,848.4,15792.0,3008.0,18800.0,7595.2,1344.0,8400.0,3393.6,1600.0,646.4,4040.0,331632.0,63168.0,394800.0,159499.2,28224.0,176400.0,71265.6,33600.0,13574.4,84840.0,252672.0,1579200.0,637996.8,300800.0,121523.2,759520.0,134400.0,54297.6,339360.0,64640.0,441,35344.0,7056.0,256.0,10000.0,1632.16,23.76641,199,193.3,50.251256,51.733057,3.4,2525.188758,126893.90743,2.833213,4.615121,3.723281,4.442651,7.378384,1,0,583.696
1,407676,0,48,165.0,68.0,11.0,91.0,40.2,54.0,7920.0,3264.0,528.0,4368.0,1929.6,11220.0,1815.0,15015.0,6633.0,748.0,6188.0,2733.6,1001.0,442.2,3658.2,538560.0,87120.0,720720.0,318384.0,35904.0,297024.0,131212.8,48048.0,21225.6,175593.6,123420.0,1021020.0,451044.0,165165.0,72963.0,603603.0,68068.0,30069.6,248757.6,40240.2,2304,27225.0,4624.0,121.0,8281.0,1616.04,24.977043,172,174.4,52.906977,52.178899,3.2,2799.148188,148094.468097,2.484907,4.521789,3.718438,4.234107,6.909753,1,0,356.7674
2,244464,1,28,178.0,77.0,23.0,103.0,40.5,125.0,4984.0,2156.0,644.0,2884.0,1134.0,13706.0,4094.0,18334.0,7209.0,1771.0,7931.0,3118.5,2369.0,931.5,4171.5,383768.0,114632.0,513352.0,201852.0,49588.0,222068.0,87318.0,66332.0,26082.0,116802.0,315238.0,1411718.0,555093.0,421682.0,165807.0,742527.0,182413.0,71725.5,321205.5,95944.5,784,31684.0,5929.0,529.0,10609.0,1640.25,24.302487,192,188.4,53.645833,54.670913,3.5,2877.875434,154386.025888,3.178054,4.644391,3.725693,4.356709,7.770645,1,0,709.343
3,38748,0,32,156.0,54.0,29.0,105.0,41.1,179.0,4992.0,1728.0,928.0,3360.0,1315.2,8424.0,4524.0,16380.0,6411.6,1566.0,5670.0,2219.4,3045.0,1191.9,4315.5,269568.0,144768.0,524160.0,205171.2,50112.0,181440.0,71020.8,97440.0,38140.8,138096.0,244296.0,884520.0,346226.4,475020.0,185936.4,673218.0,164430.0,64362.6,233037.0,125149.5,1024,24336.0,2916.0,841.0,11025.0,1689.21,22.189349,188,185.6,55.851064,56.573276,4.1,3119.341331,174218.53178,3.401197,4.663439,3.740048,4.007333,8.021585,1,0,1036.518
4,297351,1,24,172.0,75.0,6.0,77.0,39.4,9.0,4128.0,1800.0,144.0,1848.0,945.6,12900.0,1032.0,13244.0,6776.8,450.0,5775.0,2955.0,462.0,236.4,3033.8,309600.0,24768.0,317856.0,162643.2,10800.0,138600.0,70920.0,11088.0,5673.6,72811.2,77400.0,993300.0,508260.0,79464.0,40660.8,521813.6,34650.0,17730.0,227535.0,18202.8,576,29584.0,5625.0,36.0,5929.0,1552.36,25.351541,196,191.2,39.285714,40.271967,2.4,1543.367347,60632.28863,1.94591,4.356709,3.69883,4.330733,6.137727,1,0,79.3992


In [8]:
train.shape

(200000, 66)

* Crear las funciones de preprocesamiento en features.py
* Guardar los archivos resultantes en data intermedia

## Usando las funciones de feature.py

In [10]:
import pandas as pd
from package_ml import features as fe

from sklearn.preprocessing import FunctionTransformer
from sklearn.pipeline import Pipeline
import joblib

In [11]:
train = pd.read_csv('../data/raw/calories.csv', dtype={'Age': 'float64'})

In [12]:
test = pd.read_csv('../data/raw/test.csv', dtype={'Age': 'float64'})

In [13]:
fe_pipeline = Pipeline([
    ('cross', FunctionTransformer(fe.add_feature_cross_terms)),
    # ('inter', FunctionTransformer(fe.add_interaction_features,
    #                              kw_args={'features': fe.NUM_COLS})),
    ('squares', FunctionTransformer(fe.squares,
                                   kw_args={'features': fe.NUM_COLS})),
    ('domain', FunctionTransformer(fe.preprocessing))
])

In [14]:
df_train = fe_pipeline.fit_transform(train)

In [15]:
df_train.head()

Unnamed: 0,User_ID,Gender,Age,Height,Weight,Duration,Heart_Rate,Body_Temp,Calories,Age_x_Height,Age_x_Weight,Age_x_Duration,Age_x_Heart_Rate,Age_x_Body_Temp,Height_x_Weight,Height_x_Duration,Height_x_Heart_Rate,Height_x_Body_Temp,Weight_x_Duration,Weight_x_Heart_Rate,Weight_x_Body_Temp,Duration_x_Heart_Rate,Duration_x_Body_Temp,Heart_Rate_x_Body_Temp,Age_x_Height_x_Weight,Age_x_Height_x_Duration,Age_x_Height_x_Heart_Rate,Age_x_Height_x_Body_Temp,Age_x_Weight_x_Duration,Age_x_Weight_x_Heart_Rate,Age_x_Weight_x_Body_Temp,Age_x_Duration_x_Heart_Rate,Age_x_Duration_x_Body_Temp,Age_x_Heart_Rate_x_Body_Temp,Height_x_Weight_x_Duration,Height_x_Weight_x_Heart_Rate,Height_x_Weight_x_Body_Temp,Height_x_Duration_x_Heart_Rate,Height_x_Duration_x_Body_Temp,Height_x_Heart_Rate_x_Body_Temp,Weight_x_Duration_x_Heart_Rate,Weight_x_Duration_x_Body_Temp,Weight_x_Heart_Rate_x_Body_Temp,Duration_x_Heart_Rate_x_Body_Temp,Age_2,Height_2,Weight_2,Duration_2,Heart_Rate_2,Body_Temp_2,BMI,FCMT_simple,FCMT_tanaka,Percent_FCMT_simple,Percent_FCMT_tanaka,Body_Temp_Deviation,Duration_div_Body_Temp,Duration_log,Heart_Rate_log,Body_Temp_log,Weight_log,Duration_x_Heart_Rate_log,is_temp_high,is_overweight,feno_var
0,301957,False,21.0,188.0,84.0,16.0,100.0,40.4,80.0,3948.0,1764.0,336.0,2100.0,848.4,15792.0,3008.0,18800.0,7595.2,1344.0,8400.0,3393.6,1600.0,646.4,4040.0,331632.0,63168.0,394800.0,159499.2,28224.0,176400.0,71265.6,33600.0,13574.4,84840.0,252672.0,1579200.0,637996.8,300800.0,121523.2,759520.0,134400.0,54297.6,339360.0,64640.0,441.0,35344.0,7056.0,256.0,10000.0,1632.16,23.76641,199.0,193.3,50.251256,51.733057,3.4,0.39604,2.833213,4.615121,3.723281,4.442651,7.378384,True,False,583.696
1,407676,False,48.0,165.0,68.0,11.0,91.0,40.2,54.0,7920.0,3264.0,528.0,4368.0,1929.6,11220.0,1815.0,15015.0,6633.0,748.0,6188.0,2733.6,1001.0,442.2,3658.2,538560.0,87120.0,720720.0,318384.0,35904.0,297024.0,131212.8,48048.0,21225.6,175593.6,123420.0,1021020.0,451044.0,165165.0,72963.0,603603.0,68068.0,30069.6,248757.6,40240.2,2304.0,27225.0,4624.0,121.0,8281.0,1616.04,24.977043,172.0,174.4,52.906977,52.178899,3.2,0.273632,2.484907,4.521789,3.718438,4.234107,6.909753,True,False,356.7674
2,244464,True,28.0,178.0,77.0,23.0,103.0,40.5,125.0,4984.0,2156.0,644.0,2884.0,1134.0,13706.0,4094.0,18334.0,7209.0,1771.0,7931.0,3118.5,2369.0,931.5,4171.5,383768.0,114632.0,513352.0,201852.0,49588.0,222068.0,87318.0,66332.0,26082.0,116802.0,315238.0,1411718.0,555093.0,421682.0,165807.0,742527.0,182413.0,71725.5,321205.5,95944.5,784.0,31684.0,5929.0,529.0,10609.0,1640.25,24.302487,192.0,188.4,53.645833,54.670913,3.5,0.567901,3.178054,4.644391,3.725693,4.356709,7.770645,True,False,709.343
3,38748,False,32.0,156.0,54.0,29.0,105.0,41.1,179.0,4992.0,1728.0,928.0,3360.0,1315.2,8424.0,4524.0,16380.0,6411.6,1566.0,5670.0,2219.4,3045.0,1191.9,4315.5,269568.0,144768.0,524160.0,205171.2,50112.0,181440.0,71020.8,97440.0,38140.8,138096.0,244296.0,884520.0,346226.4,475020.0,185936.4,673218.0,164430.0,64362.6,233037.0,125149.5,1024.0,24336.0,2916.0,841.0,11025.0,1689.21,22.189349,188.0,185.6,55.851064,56.573276,4.1,0.705596,3.401197,4.663439,3.740048,4.007333,8.021585,True,False,1036.518
4,297351,True,24.0,172.0,75.0,6.0,77.0,39.4,9.0,4128.0,1800.0,144.0,1848.0,945.6,12900.0,1032.0,13244.0,6776.8,450.0,5775.0,2955.0,462.0,236.4,3033.8,309600.0,24768.0,317856.0,162643.2,10800.0,138600.0,70920.0,11088.0,5673.6,72811.2,77400.0,993300.0,508260.0,79464.0,40660.8,521813.6,34650.0,17730.0,227535.0,18202.8,576.0,29584.0,5625.0,36.0,5929.0,1552.36,25.351541,196.0,191.2,39.285714,40.271967,2.4,0.152284,1.94591,4.356709,3.69883,4.330733,6.137727,True,False,79.3992


In [16]:
df_test = fe_pipeline.fit_transform(test)

In [17]:
df_test.head()

Unnamed: 0,User_ID,Gender,Age,Height,Weight,Duration,Heart_Rate,Body_Temp,Calories,Age_x_Height,Age_x_Weight,Age_x_Duration,Age_x_Heart_Rate,Age_x_Body_Temp,Height_x_Weight,Height_x_Duration,Height_x_Heart_Rate,Height_x_Body_Temp,Weight_x_Duration,Weight_x_Heart_Rate,Weight_x_Body_Temp,Duration_x_Heart_Rate,Duration_x_Body_Temp,Heart_Rate_x_Body_Temp,Age_x_Height_x_Weight,Age_x_Height_x_Duration,Age_x_Height_x_Heart_Rate,Age_x_Height_x_Body_Temp,Age_x_Weight_x_Duration,Age_x_Weight_x_Heart_Rate,Age_x_Weight_x_Body_Temp,Age_x_Duration_x_Heart_Rate,Age_x_Duration_x_Body_Temp,Age_x_Heart_Rate_x_Body_Temp,Height_x_Weight_x_Duration,Height_x_Weight_x_Heart_Rate,Height_x_Weight_x_Body_Temp,Height_x_Duration_x_Heart_Rate,Height_x_Duration_x_Body_Temp,Height_x_Heart_Rate_x_Body_Temp,Weight_x_Duration_x_Heart_Rate,Weight_x_Duration_x_Body_Temp,Weight_x_Heart_Rate_x_Body_Temp,Duration_x_Heart_Rate_x_Body_Temp,Age_2,Height_2,Weight_2,Duration_2,Heart_Rate_2,Body_Temp_2,BMI,FCMT_simple,FCMT_tanaka,Percent_FCMT_simple,Percent_FCMT_tanaka,Body_Temp_Deviation,Duration_div_Body_Temp,Duration_log,Heart_Rate_log,Body_Temp_log,Weight_log,Duration_x_Heart_Rate_log,is_temp_high,is_overweight,feno_var
0,668237,False,26.0,168.0,69.0,14.0,98.0,39.7,71.0,4368.0,1794.0,364.0,2548.0,1032.2,11592.0,2352.0,16464.0,6669.6,966.0,6762.0,2739.3,1372.0,555.8,3890.6,301392.0,61152.0,428064.0,173409.6,25116.0,175812.0,71221.8,35672.0,14450.8,101155.6,162288.0,1136016.0,460202.4,230496.0,93374.4,653620.8,94668.0,38350.2,268451.4,54468.4,676.0,28224.0,4761.0,196.0,9604.0,1576.09,24.447279,194.0,189.8,50.515464,51.633298,2.7,0.352645,2.70805,4.59512,3.706228,4.248495,7.224753,True,False,476.8694
1,627687,True,38.0,164.0,71.0,10.0,91.0,39.7,42.0,6232.0,2698.0,380.0,3458.0,1508.6,11644.0,1640.0,14924.0,6510.8,710.0,6461.0,2818.7,910.0,397.0,3612.7,442472.0,62320.0,567112.0,247410.4,26980.0,245518.0,107110.6,34580.0,15086.0,137282.6,116440.0,1059604.0,462266.8,149240.0,65108.0,592482.8,64610.0,28187.0,256501.7,36127.0,1444.0,26896.0,5041.0,100.0,8281.0,1576.09,26.397977,182.0,181.4,50.0,50.16538,2.7,0.251889,2.397895,4.521789,3.706228,4.276666,6.814543,True,False,240.944
2,216817,False,79.0,163.0,65.0,11.0,90.0,39.9,57.0,12877.0,5135.0,869.0,7110.0,3152.1,10595.0,1793.0,14670.0,6503.7,715.0,5850.0,2593.5,990.0,438.9,3591.0,837005.0,141647.0,1158930.0,513792.3,56485.0,462150.0,204886.5,78210.0,34673.1,283689.0,116545.0,953550.0,422740.5,161370.0,71540.7,585333.0,64350.0,28528.5,233415.0,39501.0,6241.0,26569.0,4225.0,121.0,8100.0,1592.01,24.464602,141.0,152.7,63.829787,58.939096,2.9,0.275689,2.484907,4.51086,3.71113,4.189655,6.898715,True,False,372.9143
3,579706,False,50.0,159.0,59.0,22.0,105.0,40.8,139.0,7950.0,2950.0,1100.0,5250.0,2040.0,9381.0,3498.0,16695.0,6487.2,1298.0,6195.0,2407.2,2310.0,897.6,4284.0,469050.0,174900.0,834750.0,324360.0,64900.0,309750.0,120360.0,115500.0,44880.0,214200.0,206382.0,985005.0,382744.8,367290.0,142718.4,681156.0,136290.0,52958.4,252756.0,94248.0,2500.0,25281.0,3481.0,484.0,11025.0,1664.64,23.337684,170.0,173.0,61.764706,60.693642,3.8,0.539216,3.135494,4.663439,3.732896,4.094345,7.745436,True,False,829.521
4,507091,True,25.0,180.0,77.0,4.0,79.0,39.0,5.0,4500.0,1925.0,100.0,1975.0,975.0,13860.0,720.0,14220.0,7020.0,308.0,6083.0,3003.0,316.0,156.0,3081.0,346500.0,18000.0,355500.0,175500.0,7700.0,152075.0,75075.0,7900.0,3900.0,77025.0,55440.0,1094940.0,540540.0,56880.0,28080.0,554580.0,24332.0,12012.0,237237.0,12324.0,625.0,32400.0,5929.0,16.0,6241.0,1521.0,23.765432,195.0,190.5,40.512821,41.469816,2.0,0.102564,1.609438,4.382027,3.688879,4.356709,5.758902,False,False,60.3772


In [9]:
df_train.to_parquet('../data/interim/train_fe.parquet', index=False)

In [10]:
df_test.to_parquet('../data/interim/test_fe.parquet', index=False)

In [11]:
joblib.dump(fe_pipeline, "../models/fe_pipe.joblib")

['../models/fe_pipe.joblib']