# Feature interactions

With the two functions in this module it is possible to generate interactions between numerical and categorical variables without losing their names in the process.

[Data](https://www.kaggle.com/jsphyg/weather-dataset-rattle-package)<br>
[Module](https://github.com/abreukuse/ml_utilities/blob/master/feature_interactions.py) 

In [None]:
# Feature Engine is a very nice, easy to use and well documented library 
# that helps a lot in the task of building pipelines for data preprocessing.
# https://feature-engine.readthedocs.io/en/latest/quickstart.html
!pip install feature-engine

In [2]:
import pandas as pd
import numpy as np
from sklearn.pipeline import make_pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import FunctionTransformer
from feature_engine.wrappers import SklearnTransformerWrapper

pd.set_option('display.max_columns', 3000)

from feature_interactions import numerical_interactions, categorical_interactions

import os
os.environ['KAGGLE_USERNAME'] = 'kaggle_username'
os.environ['KAGGLE_KEY'] = 'kaggle_api_key'

In [None]:
!kaggle datasets download -d jsphyg/weather-dataset-rattle-package
!unzip 'weather-dataset-rattle-package.zip'

In [4]:
data = pd.read_csv('weatherAUS.csv')
data.head()

Unnamed: 0,Date,Location,MinTemp,MaxTemp,Rainfall,Evaporation,Sunshine,WindGustDir,WindGustSpeed,WindDir9am,WindDir3pm,WindSpeed9am,WindSpeed3pm,Humidity9am,Humidity3pm,Pressure9am,Pressure3pm,Cloud9am,Cloud3pm,Temp9am,Temp3pm,RainToday,RainTomorrow
0,2008-12-01,Albury,13.4,22.9,0.6,,,W,44.0,W,WNW,20.0,24.0,71.0,22.0,1007.7,1007.1,8.0,,16.9,21.8,No,No
1,2008-12-02,Albury,7.4,25.1,0.0,,,WNW,44.0,NNW,WSW,4.0,22.0,44.0,25.0,1010.6,1007.8,,,17.2,24.3,No,No
2,2008-12-03,Albury,12.9,25.7,0.0,,,WSW,46.0,W,WSW,19.0,26.0,38.0,30.0,1007.6,1008.7,,2.0,21.0,23.2,No,No
3,2008-12-04,Albury,9.2,28.0,0.0,,,NE,24.0,SE,E,11.0,9.0,45.0,16.0,1017.6,1012.8,,,18.1,26.5,No,No
4,2008-12-05,Albury,17.5,32.3,1.0,,,W,41.0,ENE,NW,7.0,20.0,82.0,33.0,1010.8,1006.0,7.0,8.0,17.8,29.7,No,No


In [5]:
numerical = data.select_dtypes('number')
numerical.head()

Unnamed: 0,MinTemp,MaxTemp,Rainfall,Evaporation,Sunshine,WindGustSpeed,WindSpeed9am,WindSpeed3pm,Humidity9am,Humidity3pm,Pressure9am,Pressure3pm,Cloud9am,Cloud3pm,Temp9am,Temp3pm
0,13.4,22.9,0.6,,,44.0,20.0,24.0,71.0,22.0,1007.7,1007.1,8.0,,16.9,21.8
1,7.4,25.1,0.0,,,44.0,4.0,22.0,44.0,25.0,1010.6,1007.8,,,17.2,24.3
2,12.9,25.7,0.0,,,46.0,19.0,26.0,38.0,30.0,1007.6,1008.7,,2.0,21.0,23.2
3,9.2,28.0,0.0,,,24.0,11.0,9.0,45.0,16.0,1017.6,1012.8,,,18.1,26.5
4,17.5,32.3,1.0,,,41.0,7.0,20.0,82.0,33.0,1010.8,1006.0,7.0,8.0,17.8,29.7


In [6]:
categorical = data.select_dtypes('object').drop(columns=['Date','Location','RainTomorrow'])
categorical.head()

Unnamed: 0,WindGustDir,WindDir9am,WindDir3pm,RainToday
0,W,W,WNW,No
1,WNW,NNW,WSW,No
2,WSW,W,WSW,No
3,NE,SE,E,No
4,W,ENE,NW,No


In [7]:
# Impute missing values
numerical = SklearnTransformerWrapper(transformer=SimpleImputer(strategy='median')).fit_transform(numerical)
categorical = SklearnTransformerWrapper(transformer=SimpleImputer(strategy='most_frequent')).fit_transform(categorical)

In [8]:
numerical.isnull().sum().sum()

0

In [9]:
categorical.isnull().sum().sum()

0

In [10]:
numerical = numerical_interactions(X=numerical, variables='all', degree=2, interaction_only=False)

In [11]:
numerical.head()

Unnamed: 0,MinTemp,MaxTemp,Rainfall,Evaporation,Sunshine,WindGustSpeed,WindSpeed9am,WindSpeed3pm,Humidity9am,Humidity3pm,Pressure9am,Pressure3pm,Cloud9am,Cloud3pm,Temp9am,Temp3pm,MinTemp^2,MinTemp MaxTemp,MinTemp Rainfall,MinTemp Evaporation,MinTemp Sunshine,MinTemp WindGustSpeed,MinTemp WindSpeed9am,MinTemp WindSpeed3pm,MinTemp Humidity9am,MinTemp Humidity3pm,MinTemp Pressure9am,MinTemp Pressure3pm,MinTemp Cloud9am,MinTemp Cloud3pm,MinTemp Temp9am,MinTemp Temp3pm,MaxTemp^2,MaxTemp Rainfall,MaxTemp Evaporation,MaxTemp Sunshine,MaxTemp WindGustSpeed,MaxTemp WindSpeed9am,MaxTemp WindSpeed3pm,MaxTemp Humidity9am,MaxTemp Humidity3pm,MaxTemp Pressure9am,MaxTemp Pressure3pm,MaxTemp Cloud9am,MaxTemp Cloud3pm,MaxTemp Temp9am,MaxTemp Temp3pm,Rainfall^2,Rainfall Evaporation,Rainfall Sunshine,Rainfall WindGustSpeed,Rainfall WindSpeed9am,Rainfall WindSpeed3pm,Rainfall Humidity9am,Rainfall Humidity3pm,Rainfall Pressure9am,Rainfall Pressure3pm,Rainfall Cloud9am,Rainfall Cloud3pm,Rainfall Temp9am,Rainfall Temp3pm,Evaporation^2,Evaporation Sunshine,Evaporation WindGustSpeed,Evaporation WindSpeed9am,Evaporation WindSpeed3pm,Evaporation Humidity9am,Evaporation Humidity3pm,Evaporation Pressure9am,Evaporation Pressure3pm,Evaporation Cloud9am,Evaporation Cloud3pm,Evaporation Temp9am,Evaporation Temp3pm,Sunshine^2,Sunshine WindGustSpeed,Sunshine WindSpeed9am,Sunshine WindSpeed3pm,Sunshine Humidity9am,Sunshine Humidity3pm,Sunshine Pressure9am,Sunshine Pressure3pm,Sunshine Cloud9am,Sunshine Cloud3pm,Sunshine Temp9am,Sunshine Temp3pm,WindGustSpeed^2,WindGustSpeed WindSpeed9am,WindGustSpeed WindSpeed3pm,WindGustSpeed Humidity9am,WindGustSpeed Humidity3pm,WindGustSpeed Pressure9am,WindGustSpeed Pressure3pm,WindGustSpeed Cloud9am,WindGustSpeed Cloud3pm,WindGustSpeed Temp9am,WindGustSpeed Temp3pm,WindSpeed9am^2,WindSpeed9am WindSpeed3pm,WindSpeed9am Humidity9am,WindSpeed9am Humidity3pm,WindSpeed9am Pressure9am,WindSpeed9am Pressure3pm,WindSpeed9am Cloud9am,WindSpeed9am Cloud3pm,WindSpeed9am Temp9am,WindSpeed9am Temp3pm,WindSpeed3pm^2,WindSpeed3pm Humidity9am,WindSpeed3pm Humidity3pm,WindSpeed3pm Pressure9am,WindSpeed3pm Pressure3pm,WindSpeed3pm Cloud9am,WindSpeed3pm Cloud3pm,WindSpeed3pm Temp9am,WindSpeed3pm Temp3pm,Humidity9am^2,Humidity9am Humidity3pm,Humidity9am Pressure9am,Humidity9am Pressure3pm,Humidity9am Cloud9am,Humidity9am Cloud3pm,Humidity9am Temp9am,Humidity9am Temp3pm,Humidity3pm^2,Humidity3pm Pressure9am,Humidity3pm Pressure3pm,Humidity3pm Cloud9am,Humidity3pm Cloud3pm,Humidity3pm Temp9am,Humidity3pm Temp3pm,Pressure9am^2,Pressure9am Pressure3pm,Pressure9am Cloud9am,Pressure9am Cloud3pm,Pressure9am Temp9am,Pressure9am Temp3pm,Pressure3pm^2,Pressure3pm Cloud9am,Pressure3pm Cloud3pm,Pressure3pm Temp9am,Pressure3pm Temp3pm,Cloud9am^2,Cloud9am Cloud3pm,Cloud9am Temp9am,Cloud9am Temp3pm,Cloud3pm^2,Cloud3pm Temp9am,Cloud3pm Temp3pm,Temp9am^2,Temp9am Temp3pm,Temp3pm^2
0,13.4,22.9,0.6,4.8,8.4,44.0,20.0,24.0,71.0,22.0,1007.7,1007.1,8.0,5.0,16.9,21.8,179.56,306.86,8.04,64.32,112.56,589.6,268.0,321.6,951.4,294.8,13503.18,13495.14,107.2,67.0,226.46,292.12,524.41,13.74,109.92,192.36,1007.6,458.0,549.6,1625.9,503.8,23076.33,23062.59,183.2,114.5,387.01,499.22,0.36,2.88,5.04,26.4,12.0,14.4,42.6,13.2,604.62,604.26,4.8,3.0,10.14,13.08,23.04,40.32,211.2,96.0,115.2,340.8,105.6,4836.96,4834.08,38.4,24.0,81.12,104.64,70.56,369.6,168.0,201.6,596.4,184.8,8464.68,8459.64,67.2,42.0,141.96,183.12,1936.0,880.0,1056.0,3124.0,968.0,44338.8,44312.4,352.0,220.0,743.6,959.2,400.0,480.0,1420.0,440.0,20154.0,20142.0,160.0,100.0,338.0,436.0,576.0,1704.0,528.0,24184.8,24170.4,192.0,120.0,405.6,523.2,5041.0,1562.0,71546.7,71504.1,568.0,355.0,1199.9,1547.8,484.0,22169.4,22156.2,176.0,110.0,371.8,479.6,1015459.29,1014854.67,8061.6,5038.5,17030.13,21967.86,1014250.41,8056.8,5035.5,17019.99,21954.78,64.0,40.0,135.2,174.4,25.0,84.5,109.0,285.61,368.42,475.24
1,7.4,25.1,0.0,4.8,8.4,44.0,4.0,22.0,44.0,25.0,1010.6,1007.8,5.0,5.0,17.2,24.3,54.76,185.74,0.0,35.52,62.16,325.6,29.6,162.8,325.6,185.0,7478.44,7457.72,37.0,37.0,127.28,179.82,630.01,0.0,120.48,210.84,1104.4,100.4,552.2,1104.4,627.5,25366.06,25295.78,125.5,125.5,431.72,609.93,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,23.04,40.32,211.2,19.2,105.6,211.2,120.0,4850.88,4837.44,24.0,24.0,82.56,116.64,70.56,369.6,33.6,184.8,369.6,210.0,8489.04,8465.52,42.0,42.0,144.48,204.12,1936.0,176.0,968.0,1936.0,1100.0,44466.4,44343.2,220.0,220.0,756.8,1069.2,16.0,88.0,176.0,100.0,4042.4,4031.2,20.0,20.0,68.8,97.2,484.0,968.0,550.0,22233.2,22171.6,110.0,110.0,378.4,534.6,1936.0,1100.0,44466.4,44343.2,220.0,220.0,756.8,1069.2,625.0,25265.0,25195.0,125.0,125.0,430.0,607.5,1021312.36,1018482.68,5053.0,5053.0,17382.32,24557.58,1015660.84,5039.0,5039.0,17334.16,24489.54,25.0,25.0,86.0,121.5,25.0,86.0,121.5,295.84,417.96,590.49
2,12.9,25.7,0.0,4.8,8.4,46.0,19.0,26.0,38.0,30.0,1007.6,1008.7,5.0,2.0,21.0,23.2,166.41,331.53,0.0,61.92,108.36,593.4,245.1,335.4,490.2,387.0,12998.04,13012.23,64.5,25.8,270.9,299.28,660.49,0.0,123.36,215.88,1182.2,488.3,668.2,976.6,771.0,25895.32,25923.59,128.5,51.4,539.7,596.24,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,23.04,40.32,220.8,91.2,124.8,182.4,144.0,4836.48,4841.76,24.0,9.6,100.8,111.36,70.56,386.4,159.6,218.4,319.2,252.0,8463.84,8473.08,42.0,16.8,176.4,194.88,2116.0,874.0,1196.0,1748.0,1380.0,46349.6,46400.2,230.0,92.0,966.0,1067.2,361.0,494.0,722.0,570.0,19144.4,19165.3,95.0,38.0,399.0,440.8,676.0,988.0,780.0,26197.6,26226.2,130.0,52.0,546.0,603.2,1444.0,1140.0,38288.8,38330.6,190.0,76.0,798.0,881.6,900.0,30228.0,30261.0,150.0,60.0,630.0,696.0,1015257.76,1016366.12,5038.0,2015.2,21159.6,23376.32,1017475.69,5043.5,2017.4,21182.7,23401.84,25.0,10.0,105.0,116.0,4.0,42.0,46.4,441.0,487.2,538.24
3,9.2,28.0,0.0,4.8,8.4,24.0,11.0,9.0,45.0,16.0,1017.6,1012.8,5.0,5.0,18.1,26.5,84.64,257.6,0.0,44.16,77.28,220.8,101.2,82.8,414.0,147.2,9361.92,9317.76,46.0,46.0,166.52,243.8,784.0,0.0,134.4,235.2,672.0,308.0,252.0,1260.0,448.0,28492.8,28358.4,140.0,140.0,506.8,742.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,23.04,40.32,115.2,52.8,43.2,216.0,76.8,4884.48,4861.44,24.0,24.0,86.88,127.2,70.56,201.6,92.4,75.6,378.0,134.4,8547.84,8507.52,42.0,42.0,152.04,222.6,576.0,264.0,216.0,1080.0,384.0,24422.4,24307.2,120.0,120.0,434.4,636.0,121.0,99.0,495.0,176.0,11193.6,11140.8,55.0,55.0,199.1,291.5,81.0,405.0,144.0,9158.4,9115.2,45.0,45.0,162.9,238.5,2025.0,720.0,45792.0,45576.0,225.0,225.0,814.5,1192.5,256.0,16281.6,16204.8,80.0,80.0,289.6,424.0,1035509.76,1030625.28,5088.0,5088.0,18418.56,26966.4,1025763.84,5064.0,5064.0,18331.68,26839.2,25.0,25.0,90.5,132.5,25.0,90.5,132.5,327.61,479.65,702.25
4,17.5,32.3,1.0,4.8,8.4,41.0,7.0,20.0,82.0,33.0,1010.8,1006.0,7.0,8.0,17.8,29.7,306.25,565.25,17.5,84.0,147.0,717.5,122.5,350.0,1435.0,577.5,17689.0,17605.0,122.5,140.0,311.5,519.75,1043.29,32.3,155.04,271.32,1324.3,226.1,646.0,2648.6,1065.9,32648.84,32493.8,226.1,258.4,574.94,959.31,1.0,4.8,8.4,41.0,7.0,20.0,82.0,33.0,1010.8,1006.0,7.0,8.0,17.8,29.7,23.04,40.32,196.8,33.6,96.0,393.6,158.4,4851.84,4828.8,33.6,38.4,85.44,142.56,70.56,344.4,58.8,168.0,688.8,277.2,8490.72,8450.4,58.8,67.2,149.52,249.48,1681.0,287.0,820.0,3362.0,1353.0,41442.8,41246.0,287.0,328.0,729.8,1217.7,49.0,140.0,574.0,231.0,7075.6,7042.0,49.0,56.0,124.6,207.9,400.0,1640.0,660.0,20216.0,20120.0,140.0,160.0,356.0,594.0,6724.0,2706.0,82885.6,82492.0,574.0,656.0,1459.6,2435.4,1089.0,33356.4,33198.0,231.0,264.0,587.4,980.1,1021716.64,1016864.8,7075.6,8086.4,17992.24,30020.76,1012036.0,7042.0,8048.0,17906.8,29878.2,49.0,56.0,124.6,207.9,64.0,142.4,237.6,316.84,528.66,882.09


In [12]:
categorical = categorical_interactions(X=categorical, variables='all', order=4)

In [13]:
categorical.head()

Unnamed: 0,WindGustDir,WindDir9am,WindDir3pm,RainToday,WindGustDir-WindDir9am,WindGustDir-WindDir3pm,WindGustDir-RainToday,WindDir9am-WindDir3pm,WindDir9am-RainToday,WindDir3pm-RainToday,WindGustDir-WindDir9am-WindDir3pm,WindGustDir-WindDir9am-RainToday,WindGustDir-WindDir3pm-RainToday,WindDir9am-WindDir3pm-RainToday,WindGustDir-WindDir9am-WindDir3pm-RainToday
0,W,W,WNW,No,W-W,W-WNW,W-No,W-WNW,W-No,WNW-No,W-W-WNW,W-W-No,W-WNW-No,W-WNW-No,W-W-WNW-No
1,WNW,NNW,WSW,No,WNW-NNW,WNW-WSW,WNW-No,NNW-WSW,NNW-No,WSW-No,WNW-NNW-WSW,WNW-NNW-No,WNW-WSW-No,NNW-WSW-No,WNW-NNW-WSW-No
2,WSW,W,WSW,No,WSW-W,WSW-WSW,WSW-No,W-WSW,W-No,WSW-No,WSW-W-WSW,WSW-W-No,WSW-WSW-No,W-WSW-No,WSW-W-WSW-No
3,NE,SE,E,No,NE-SE,NE-E,NE-No,SE-E,SE-No,E-No,NE-SE-E,NE-SE-No,NE-E-No,SE-E-No,NE-SE-E-No
4,W,ENE,NW,No,W-ENE,W-NW,W-No,ENE-NW,ENE-No,NW-No,W-ENE-NW,W-ENE-No,W-NW-No,ENE-NW-No,W-ENE-NW-No


## Pipeline

In [14]:
VARIABLES = [
             # numerical
             'MinTemp',
             'MaxTemp',
             'Rainfall',
             'Evaporation',
             'Sunshine',
             'WindGustSpeed',
             'WindSpeed9am',
             'WindSpeed3pm',
             'Humidity9am',
             'Humidity3pm',
             'Pressure9am',
             'Pressure3pm',
             'Cloud9am',
             'Cloud3pm',
             'Temp9am',
             'Temp3pm',
             
             # categorical
             'WindGustDir',
             'WindDir9am',
             'WindDir3pm',
             'RainToday'
             ]

NUMERICAL = [
             'MinTemp',
             'MaxTemp',
             'Rainfall',
             'Evaporation',
             'Sunshine',
             'WindGustSpeed',
             'WindSpeed9am',
             'WindSpeed3pm',
             'Humidity9am',
             'Humidity3pm',
             'Pressure9am',
             'Pressure3pm',
             'Cloud9am',
             'Cloud3pm',
             'Temp9am',
             'Temp3pm'
             ]

CATEGORICAL = [
               'WindGustDir',
               'WindDir9am',
               'WindDir3pm',
               'RainToday'
               ]             

def select(X, variables):
    """Select just a few variables for the demonstration."""
    X = X[variables].copy()
    return X

pipeline = make_pipeline(

    # Selection of varibles
    FunctionTransformer(select, 
                        kw_args={'variables': VARIABLES}
                        ),

    # Missing data imputation for numerical variables
    SklearnTransformerWrapper(variables=NUMERICAL,
                              transformer=SimpleImputer(strategy='median')
                              ),

    # Missing data imputation for categorical variables
    SklearnTransformerWrapper(variables=CATEGORICAL,
                              transformer=SimpleImputer(strategy='most_frequent')
                              ),

    # Creation of numerical interactions features
    FunctionTransformer(numerical_interactions, 
                        kw_args={'variables': NUMERICAL,
                                 'degree': 2,
                                 'interaction_only': True
                                 }
                        ),

    # Creation of categorical interactions features
    FunctionTransformer(categorical_interactions,
                        kw_args={'variables': CATEGORICAL,
                                 'order': 4
                                 }
                        )                         
)

In [15]:
# There are missing data in the target
data = data.loc[~data['RainTomorrow'].isnull(), :].copy()

# Date/time conversion
data['Date'] = pd.to_datetime(data['Date'])

In [16]:
# Simple split
train = data.query('Date < "2015-01-01"').copy()
validation = data.query('Date >= "2015-01-01"').copy()

In [17]:
X_train = pipeline.fit_transform(train)

In [18]:
X_train.head()

Unnamed: 0,MinTemp,MaxTemp,Rainfall,Evaporation,Sunshine,WindGustSpeed,WindSpeed9am,WindSpeed3pm,Humidity9am,Humidity3pm,Pressure9am,Pressure3pm,Cloud9am,Cloud3pm,Temp9am,Temp3pm,WindGustDir,WindDir9am,WindDir3pm,RainToday,MinTemp MaxTemp,MinTemp Rainfall,MinTemp Evaporation,MinTemp Sunshine,MinTemp WindGustSpeed,MinTemp WindSpeed9am,MinTemp WindSpeed3pm,MinTemp Humidity9am,MinTemp Humidity3pm,MinTemp Pressure9am,MinTemp Pressure3pm,MinTemp Cloud9am,MinTemp Cloud3pm,MinTemp Temp9am,MinTemp Temp3pm,MaxTemp Rainfall,MaxTemp Evaporation,MaxTemp Sunshine,MaxTemp WindGustSpeed,MaxTemp WindSpeed9am,MaxTemp WindSpeed3pm,MaxTemp Humidity9am,MaxTemp Humidity3pm,MaxTemp Pressure9am,MaxTemp Pressure3pm,MaxTemp Cloud9am,MaxTemp Cloud3pm,MaxTemp Temp9am,MaxTemp Temp3pm,Rainfall Evaporation,Rainfall Sunshine,Rainfall WindGustSpeed,Rainfall WindSpeed9am,Rainfall WindSpeed3pm,Rainfall Humidity9am,Rainfall Humidity3pm,Rainfall Pressure9am,Rainfall Pressure3pm,Rainfall Cloud9am,Rainfall Cloud3pm,Rainfall Temp9am,Rainfall Temp3pm,Evaporation Sunshine,Evaporation WindGustSpeed,Evaporation WindSpeed9am,Evaporation WindSpeed3pm,Evaporation Humidity9am,Evaporation Humidity3pm,Evaporation Pressure9am,Evaporation Pressure3pm,Evaporation Cloud9am,Evaporation Cloud3pm,Evaporation Temp9am,Evaporation Temp3pm,Sunshine WindGustSpeed,Sunshine WindSpeed9am,Sunshine WindSpeed3pm,Sunshine Humidity9am,Sunshine Humidity3pm,Sunshine Pressure9am,Sunshine Pressure3pm,Sunshine Cloud9am,Sunshine Cloud3pm,Sunshine Temp9am,Sunshine Temp3pm,WindGustSpeed WindSpeed9am,WindGustSpeed WindSpeed3pm,WindGustSpeed Humidity9am,WindGustSpeed Humidity3pm,WindGustSpeed Pressure9am,WindGustSpeed Pressure3pm,WindGustSpeed Cloud9am,WindGustSpeed Cloud3pm,WindGustSpeed Temp9am,WindGustSpeed Temp3pm,WindSpeed9am WindSpeed3pm,WindSpeed9am Humidity9am,WindSpeed9am Humidity3pm,WindSpeed9am Pressure9am,WindSpeed9am Pressure3pm,WindSpeed9am Cloud9am,WindSpeed9am Cloud3pm,WindSpeed9am Temp9am,WindSpeed9am Temp3pm,WindSpeed3pm Humidity9am,WindSpeed3pm Humidity3pm,WindSpeed3pm Pressure9am,WindSpeed3pm Pressure3pm,WindSpeed3pm Cloud9am,WindSpeed3pm Cloud3pm,WindSpeed3pm Temp9am,WindSpeed3pm Temp3pm,Humidity9am Humidity3pm,Humidity9am Pressure9am,Humidity9am Pressure3pm,Humidity9am Cloud9am,Humidity9am Cloud3pm,Humidity9am Temp9am,Humidity9am Temp3pm,Humidity3pm Pressure9am,Humidity3pm Pressure3pm,Humidity3pm Cloud9am,Humidity3pm Cloud3pm,Humidity3pm Temp9am,Humidity3pm Temp3pm,Pressure9am Pressure3pm,Pressure9am Cloud9am,Pressure9am Cloud3pm,Pressure9am Temp9am,Pressure9am Temp3pm,Pressure3pm Cloud9am,Pressure3pm Cloud3pm,Pressure3pm Temp9am,Pressure3pm Temp3pm,Cloud9am Cloud3pm,Cloud9am Temp9am,Cloud9am Temp3pm,Cloud3pm Temp9am,Cloud3pm Temp3pm,Temp9am Temp3pm,WindGustDir-WindDir9am,WindGustDir-WindDir3pm,WindGustDir-RainToday,WindDir9am-WindDir3pm,WindDir9am-RainToday,WindDir3pm-RainToday,WindGustDir-WindDir9am-WindDir3pm,WindGustDir-WindDir9am-RainToday,WindGustDir-WindDir3pm-RainToday,WindDir9am-WindDir3pm-RainToday,WindGustDir-WindDir9am-WindDir3pm-RainToday
0,13.4,22.9,0.6,4.6,8.4,44.0,20.0,24.0,71.0,22.0,1007.7,1007.1,8.0,5.0,16.9,21.8,W,W,WNW,No,306.86,8.04,61.64,112.56,589.6,268.0,321.6,951.4,294.8,13503.18,13495.14,107.2,67.0,226.46,292.12,13.74,105.34,192.36,1007.6,458.0,549.6,1625.9,503.8,23076.33,23062.59,183.2,114.5,387.01,499.22,2.76,5.04,26.4,12.0,14.4,42.6,13.2,604.62,604.26,4.8,3.0,10.14,13.08,38.64,202.4,92.0,110.4,326.6,101.2,4635.42,4632.66,36.8,23.0,77.74,100.28,369.6,168.0,201.6,596.4,184.8,8464.68,8459.64,67.2,42.0,141.96,183.12,880.0,1056.0,3124.0,968.0,44338.8,44312.4,352.0,220.0,743.6,959.2,480.0,1420.0,440.0,20154.0,20142.0,160.0,100.0,338.0,436.0,1704.0,528.0,24184.8,24170.4,192.0,120.0,405.6,523.2,1562.0,71546.7,71504.1,568.0,355.0,1199.9,1547.8,22169.4,22156.2,176.0,110.0,371.8,479.6,1014854.67,8061.6,5038.5,17030.13,21967.86,8056.8,5035.5,17019.99,21954.78,40.0,135.2,174.4,84.5,109.0,368.42,W-W,W-WNW,W-No,W-WNW,W-No,WNW-No,W-W-WNW,W-W-No,W-WNW-No,W-WNW-No,W-W-WNW-No
1,7.4,25.1,0.0,4.6,8.4,44.0,4.0,22.0,44.0,25.0,1010.6,1007.8,5.0,5.0,17.2,24.3,WNW,NNW,WSW,No,185.74,0.0,34.04,62.16,325.6,29.6,162.8,325.6,185.0,7478.44,7457.72,37.0,37.0,127.28,179.82,0.0,115.46,210.84,1104.4,100.4,552.2,1104.4,627.5,25366.06,25295.78,125.5,125.5,431.72,609.93,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,38.64,202.4,18.4,101.2,202.4,115.0,4648.76,4635.88,23.0,23.0,79.12,111.78,369.6,33.6,184.8,369.6,210.0,8489.04,8465.52,42.0,42.0,144.48,204.12,176.0,968.0,1936.0,1100.0,44466.4,44343.2,220.0,220.0,756.8,1069.2,88.0,176.0,100.0,4042.4,4031.2,20.0,20.0,68.8,97.2,968.0,550.0,22233.2,22171.6,110.0,110.0,378.4,534.6,1100.0,44466.4,44343.2,220.0,220.0,756.8,1069.2,25265.0,25195.0,125.0,125.0,430.0,607.5,1018482.68,5053.0,5053.0,17382.32,24557.58,5039.0,5039.0,17334.16,24489.54,25.0,86.0,121.5,86.0,121.5,417.96,WNW-NNW,WNW-WSW,WNW-No,NNW-WSW,NNW-No,WSW-No,WNW-NNW-WSW,WNW-NNW-No,WNW-WSW-No,NNW-WSW-No,WNW-NNW-WSW-No
2,12.9,25.7,0.0,4.6,8.4,46.0,19.0,26.0,38.0,30.0,1007.6,1008.7,5.0,2.0,21.0,23.2,WSW,W,WSW,No,331.53,0.0,59.34,108.36,593.4,245.1,335.4,490.2,387.0,12998.04,13012.23,64.5,25.8,270.9,299.28,0.0,118.22,215.88,1182.2,488.3,668.2,976.6,771.0,25895.32,25923.59,128.5,51.4,539.7,596.24,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,38.64,211.6,87.4,119.6,174.8,138.0,4634.96,4640.02,23.0,9.2,96.6,106.72,386.4,159.6,218.4,319.2,252.0,8463.84,8473.08,42.0,16.8,176.4,194.88,874.0,1196.0,1748.0,1380.0,46349.6,46400.2,230.0,92.0,966.0,1067.2,494.0,722.0,570.0,19144.4,19165.3,95.0,38.0,399.0,440.8,988.0,780.0,26197.6,26226.2,130.0,52.0,546.0,603.2,1140.0,38288.8,38330.6,190.0,76.0,798.0,881.6,30228.0,30261.0,150.0,60.0,630.0,696.0,1016366.12,5038.0,2015.2,21159.6,23376.32,5043.5,2017.4,21182.7,23401.84,10.0,105.0,116.0,42.0,46.4,487.2,WSW-W,WSW-WSW,WSW-No,W-WSW,W-No,WSW-No,WSW-W-WSW,WSW-W-No,WSW-WSW-No,W-WSW-No,WSW-W-WSW-No
3,9.2,28.0,0.0,4.6,8.4,24.0,11.0,9.0,45.0,16.0,1017.6,1012.8,5.0,5.0,18.1,26.5,NE,SE,E,No,257.6,0.0,42.32,77.28,220.8,101.2,82.8,414.0,147.2,9361.92,9317.76,46.0,46.0,166.52,243.8,0.0,128.8,235.2,672.0,308.0,252.0,1260.0,448.0,28492.8,28358.4,140.0,140.0,506.8,742.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,38.64,110.4,50.6,41.4,207.0,73.6,4680.96,4658.88,23.0,23.0,83.26,121.9,201.6,92.4,75.6,378.0,134.4,8547.84,8507.52,42.0,42.0,152.04,222.6,264.0,216.0,1080.0,384.0,24422.4,24307.2,120.0,120.0,434.4,636.0,99.0,495.0,176.0,11193.6,11140.8,55.0,55.0,199.1,291.5,405.0,144.0,9158.4,9115.2,45.0,45.0,162.9,238.5,720.0,45792.0,45576.0,225.0,225.0,814.5,1192.5,16281.6,16204.8,80.0,80.0,289.6,424.0,1030625.28,5088.0,5088.0,18418.56,26966.4,5064.0,5064.0,18331.68,26839.2,25.0,90.5,132.5,90.5,132.5,479.65,NE-SE,NE-E,NE-No,SE-E,SE-No,E-No,NE-SE-E,NE-SE-No,NE-E-No,SE-E-No,NE-SE-E-No
4,17.5,32.3,1.0,4.6,8.4,41.0,7.0,20.0,82.0,33.0,1010.8,1006.0,7.0,8.0,17.8,29.7,W,ENE,NW,No,565.25,17.5,80.5,147.0,717.5,122.5,350.0,1435.0,577.5,17689.0,17605.0,122.5,140.0,311.5,519.75,32.3,148.58,271.32,1324.3,226.1,646.0,2648.6,1065.9,32648.84,32493.8,226.1,258.4,574.94,959.31,4.6,8.4,41.0,7.0,20.0,82.0,33.0,1010.8,1006.0,7.0,8.0,17.8,29.7,38.64,188.6,32.2,92.0,377.2,151.8,4649.68,4627.6,32.2,36.8,81.88,136.62,344.4,58.8,168.0,688.8,277.2,8490.72,8450.4,58.8,67.2,149.52,249.48,287.0,820.0,3362.0,1353.0,41442.8,41246.0,287.0,328.0,729.8,1217.7,140.0,574.0,231.0,7075.6,7042.0,49.0,56.0,124.6,207.9,1640.0,660.0,20216.0,20120.0,140.0,160.0,356.0,594.0,2706.0,82885.6,82492.0,574.0,656.0,1459.6,2435.4,33356.4,33198.0,231.0,264.0,587.4,980.1,1016864.8,7075.6,8086.4,17992.24,30020.76,7042.0,8048.0,17906.8,29878.2,56.0,124.6,207.9,142.4,237.6,528.66,W-ENE,W-NW,W-No,ENE-NW,ENE-No,NW-No,W-ENE-NW,W-ENE-No,W-NW-No,ENE-NW-No,W-ENE-NW-No


In [19]:
X_validation = pipeline.transform(validation)

In [20]:
X_validation.head()

Unnamed: 0,MinTemp,MaxTemp,Rainfall,Evaporation,Sunshine,WindGustSpeed,WindSpeed9am,WindSpeed3pm,Humidity9am,Humidity3pm,Pressure9am,Pressure3pm,Cloud9am,Cloud3pm,Temp9am,Temp3pm,WindGustDir,WindDir9am,WindDir3pm,RainToday,MinTemp MaxTemp,MinTemp Rainfall,MinTemp Evaporation,MinTemp Sunshine,MinTemp WindGustSpeed,MinTemp WindSpeed9am,MinTemp WindSpeed3pm,MinTemp Humidity9am,MinTemp Humidity3pm,MinTemp Pressure9am,MinTemp Pressure3pm,MinTemp Cloud9am,MinTemp Cloud3pm,MinTemp Temp9am,MinTemp Temp3pm,MaxTemp Rainfall,MaxTemp Evaporation,MaxTemp Sunshine,MaxTemp WindGustSpeed,MaxTemp WindSpeed9am,MaxTemp WindSpeed3pm,MaxTemp Humidity9am,MaxTemp Humidity3pm,MaxTemp Pressure9am,MaxTemp Pressure3pm,MaxTemp Cloud9am,MaxTemp Cloud3pm,MaxTemp Temp9am,MaxTemp Temp3pm,Rainfall Evaporation,Rainfall Sunshine,Rainfall WindGustSpeed,Rainfall WindSpeed9am,Rainfall WindSpeed3pm,Rainfall Humidity9am,Rainfall Humidity3pm,Rainfall Pressure9am,Rainfall Pressure3pm,Rainfall Cloud9am,Rainfall Cloud3pm,Rainfall Temp9am,Rainfall Temp3pm,Evaporation Sunshine,Evaporation WindGustSpeed,Evaporation WindSpeed9am,Evaporation WindSpeed3pm,Evaporation Humidity9am,Evaporation Humidity3pm,Evaporation Pressure9am,Evaporation Pressure3pm,Evaporation Cloud9am,Evaporation Cloud3pm,Evaporation Temp9am,Evaporation Temp3pm,Sunshine WindGustSpeed,Sunshine WindSpeed9am,Sunshine WindSpeed3pm,Sunshine Humidity9am,Sunshine Humidity3pm,Sunshine Pressure9am,Sunshine Pressure3pm,Sunshine Cloud9am,Sunshine Cloud3pm,Sunshine Temp9am,Sunshine Temp3pm,WindGustSpeed WindSpeed9am,WindGustSpeed WindSpeed3pm,WindGustSpeed Humidity9am,WindGustSpeed Humidity3pm,WindGustSpeed Pressure9am,WindGustSpeed Pressure3pm,WindGustSpeed Cloud9am,WindGustSpeed Cloud3pm,WindGustSpeed Temp9am,WindGustSpeed Temp3pm,WindSpeed9am WindSpeed3pm,WindSpeed9am Humidity9am,WindSpeed9am Humidity3pm,WindSpeed9am Pressure9am,WindSpeed9am Pressure3pm,WindSpeed9am Cloud9am,WindSpeed9am Cloud3pm,WindSpeed9am Temp9am,WindSpeed9am Temp3pm,WindSpeed3pm Humidity9am,WindSpeed3pm Humidity3pm,WindSpeed3pm Pressure9am,WindSpeed3pm Pressure3pm,WindSpeed3pm Cloud9am,WindSpeed3pm Cloud3pm,WindSpeed3pm Temp9am,WindSpeed3pm Temp3pm,Humidity9am Humidity3pm,Humidity9am Pressure9am,Humidity9am Pressure3pm,Humidity9am Cloud9am,Humidity9am Cloud3pm,Humidity9am Temp9am,Humidity9am Temp3pm,Humidity3pm Pressure9am,Humidity3pm Pressure3pm,Humidity3pm Cloud9am,Humidity3pm Cloud3pm,Humidity3pm Temp9am,Humidity3pm Temp3pm,Pressure9am Pressure3pm,Pressure9am Cloud9am,Pressure9am Cloud3pm,Pressure9am Temp9am,Pressure9am Temp3pm,Pressure3pm Cloud9am,Pressure3pm Cloud3pm,Pressure3pm Temp9am,Pressure3pm Temp3pm,Cloud9am Cloud3pm,Cloud9am Temp9am,Cloud9am Temp3pm,Cloud3pm Temp9am,Cloud3pm Temp3pm,Temp9am Temp3pm,WindGustDir-WindDir9am,WindGustDir-WindDir3pm,WindGustDir-RainToday,WindDir9am-WindDir3pm,WindDir9am-RainToday,WindDir3pm-RainToday,WindGustDir-WindDir9am-WindDir3pm,WindGustDir-WindDir9am-RainToday,WindGustDir-WindDir3pm-RainToday,WindDir9am-WindDir3pm-RainToday,WindGustDir-WindDir9am-WindDir3pm-RainToday
2133,11.4,33.5,0.0,4.6,8.4,30.0,7.0,11.0,45.0,14.0,1013.5,1011.0,5.0,5.0,21.0,32.7,WSW,ESE,W,No,381.9,0.0,52.44,95.76,342.0,79.8,125.4,513.0,159.6,11553.9,11525.4,57.0,57.0,239.4,372.78,0.0,154.1,281.4,1005.0,234.5,368.5,1507.5,469.0,33952.25,33868.5,167.5,167.5,703.5,1095.45,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,38.64,138.0,32.2,50.6,207.0,64.4,4662.1,4650.6,23.0,23.0,96.6,150.42,252.0,58.8,92.4,378.0,117.6,8513.4,8492.4,42.0,42.0,176.4,274.68,210.0,330.0,1350.0,420.0,30405.0,30330.0,150.0,150.0,630.0,981.0,77.0,315.0,98.0,7094.5,7077.0,35.0,35.0,147.0,228.9,495.0,154.0,11148.5,11121.0,55.0,55.0,231.0,359.7,630.0,45607.5,45495.0,225.0,225.0,945.0,1471.5,14189.0,14154.0,70.0,70.0,294.0,457.8,1024648.5,5067.5,5067.5,21283.5,33141.45,5055.0,5055.0,21231.0,33059.7,25.0,105.0,163.5,105.0,163.5,686.7,WSW-ESE,WSW-W,WSW-No,ESE-W,ESE-No,W-No,WSW-ESE-W,WSW-ESE-No,WSW-W-No,ESE-W-No,WSW-ESE-W-No
2134,15.5,39.6,0.0,4.6,8.4,56.0,9.0,9.0,45.0,12.0,1016.0,1012.4,5.0,5.0,25.6,38.2,NE,ESE,ESE,No,613.8,0.0,71.3,130.2,868.0,139.5,139.5,697.5,186.0,15748.0,15692.2,77.5,77.5,396.8,592.1,0.0,182.16,332.64,2217.6,356.4,356.4,1782.0,475.2,40233.6,40091.04,198.0,198.0,1013.76,1512.72,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,38.64,257.6,41.4,41.4,207.0,55.2,4673.6,4657.04,23.0,23.0,117.76,175.72,470.4,75.6,75.6,378.0,100.8,8534.4,8504.16,42.0,42.0,215.04,320.88,504.0,504.0,2520.0,672.0,56896.0,56694.4,280.0,280.0,1433.6,2139.2,81.0,405.0,108.0,9144.0,9111.6,45.0,45.0,230.4,343.8,405.0,108.0,9144.0,9111.6,45.0,45.0,230.4,343.8,540.0,45720.0,45558.0,225.0,225.0,1152.0,1719.0,12192.0,12148.8,60.0,60.0,307.2,458.4,1028598.4,5080.0,5080.0,26009.6,38811.2,5062.0,5062.0,25917.44,38673.68,25.0,128.0,191.0,128.0,191.0,977.92,NE-ESE,NE-ESE,NE-No,ESE-ESE,ESE-No,ESE-No,NE-ESE-ESE,NE-ESE-No,NE-ESE-No,ESE-ESE-No,NE-ESE-ESE-No
2135,17.1,38.3,0.0,4.6,8.4,48.0,20.0,20.0,35.0,19.0,1017.9,1012.3,5.0,5.0,29.2,37.0,NNE,NE,N,No,654.93,0.0,78.66,143.64,820.8,342.0,342.0,598.5,324.9,17406.09,17310.33,85.5,85.5,499.32,632.7,0.0,176.18,321.72,1838.4,766.0,766.0,1340.5,727.7,38985.57,38771.09,191.5,191.5,1118.36,1417.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,38.64,220.8,92.0,92.0,161.0,87.4,4682.34,4656.58,23.0,23.0,134.32,170.2,403.2,168.0,168.0,294.0,159.6,8550.36,8503.32,42.0,42.0,245.28,310.8,960.0,960.0,1680.0,912.0,48859.2,48590.4,240.0,240.0,1401.6,1776.0,400.0,700.0,380.0,20358.0,20246.0,100.0,100.0,584.0,740.0,700.0,380.0,20358.0,20246.0,100.0,100.0,584.0,740.0,665.0,35626.5,35430.5,175.0,175.0,1022.0,1295.0,19340.1,19233.7,95.0,95.0,554.8,703.0,1030420.17,5089.5,5089.5,29722.68,37662.3,5061.5,5061.5,29559.16,37455.1,25.0,146.0,185.0,146.0,185.0,1080.4,NNE-NE,NNE-N,NNE-No,NE-N,NE-No,N-No,NNE-NE-N,NNE-NE-No,NNE-N-No,NE-N-No,NNE-NE-N-No
2136,26.0,33.1,0.0,4.6,8.4,41.0,7.0,7.0,46.0,37.0,1013.6,1012.1,8.0,5.0,27.4,30.9,NNE,ESE,W,No,860.6,0.0,119.6,218.4,1066.0,182.0,182.0,1196.0,962.0,26353.6,26314.6,208.0,130.0,712.4,803.4,0.0,152.26,278.04,1357.1,231.7,231.7,1522.6,1224.7,33550.16,33500.51,264.8,165.5,906.94,1022.79,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,38.64,188.6,32.2,32.2,211.6,170.2,4662.56,4655.66,36.8,23.0,126.04,142.14,344.4,58.8,58.8,386.4,310.8,8514.24,8501.64,67.2,42.0,230.16,259.56,287.0,287.0,1886.0,1517.0,41557.6,41496.1,328.0,205.0,1123.4,1266.9,49.0,322.0,259.0,7095.2,7084.7,56.0,35.0,191.8,216.3,322.0,259.0,7095.2,7084.7,56.0,35.0,191.8,216.3,1702.0,46625.6,46556.6,368.0,230.0,1260.4,1421.4,37503.2,37447.7,296.0,185.0,1013.8,1143.3,1025864.56,8108.8,5068.0,27772.64,31320.24,8096.8,5060.5,27731.54,31273.89,40.0,219.2,247.2,137.0,154.5,846.66,NNE-ESE,NNE-W,NNE-No,ESE-W,ESE-No,W-No,NNE-ESE-W,NNE-ESE-No,NNE-W-No,ESE-W-No,NNE-ESE-W-No
2137,19.0,35.2,0.0,4.6,8.4,33.0,7.0,9.0,60.0,34.0,1017.4,1014.7,8.0,5.0,25.6,32.5,E,SSE,SE,No,668.8,0.0,87.4,159.6,627.0,133.0,171.0,1140.0,646.0,19330.6,19279.3,152.0,95.0,486.4,617.5,0.0,161.92,295.68,1161.6,246.4,316.8,2112.0,1196.8,35812.48,35717.44,281.6,176.0,901.12,1144.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,38.64,151.8,32.2,41.4,276.0,156.4,4680.04,4667.62,36.8,23.0,117.76,149.5,277.2,58.8,75.6,504.0,285.6,8546.16,8523.48,67.2,42.0,215.04,273.0,231.0,297.0,1980.0,1122.0,33574.2,33485.1,264.0,165.0,844.8,1072.5,63.0,420.0,238.0,7121.8,7102.9,56.0,35.0,179.2,227.5,540.0,306.0,9156.6,9132.3,72.0,45.0,230.4,292.5,2040.0,61044.0,60882.0,480.0,300.0,1536.0,1950.0,34591.6,34499.8,272.0,170.0,870.4,1105.0,1032355.78,8139.2,5087.0,26045.44,33065.5,8117.6,5073.5,25976.32,32977.75,40.0,204.8,260.0,128.0,162.5,832.0,E-SSE,E-SE,E-No,SSE-SE,SSE-No,SE-No,E-SSE-SE,E-SSE-No,E-SE-No,SSE-SE-No,E-SSE-SE-No


From here the preprocessing steps could go on and on with feature scaling, categorical encoding, so on and so forth.