# Pipelines

In [1]:
import pandas as pd
from sklearn.pipeline import Pipeline 
import types

In [2]:
from sklearn.preprocessing import FunctionTransformer ,OneHotEncoder, LabelEncoder,StandardScaler,MinMaxScaler
import category_encoders as ce

In [3]:
import transformers

In [4]:
df = pd.read_csv('../data/Google_Stock_Price_Train.csv')

In [5]:
categories_features = ['Volatility_group','Day_of_week']
features_for_model=['Open', 'High', 'Low', 'Close', 'Volume', 'Volatility', 'Day_of_week',
       'Volatility_group', 'Close_30_moving_avarage',
       'Is_close_below_30_moving_avarage', 'Is_close_above_30_moving_avarage',
       'Days_close_below_30_moving_avarage',
       'Days_close_above_30_moving_avarage']

In [6]:
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume
0,1/3/2012,325.25,332.83,324.97,663.59,7380500
1,1/4/2012,331.27,333.87,329.08,666.45,5749400
2,1/5/2012,329.83,330.75,326.89,657.21,6590300
3,1/6/2012,328.34,328.77,323.68,648.24,5405900
4,1/9/2012,322.04,322.29,309.46,620.76,11688800


In [7]:
stock_pipeline = Pipeline(transformers.get_transformer_func_list())
categories_pipe = Pipeline([
    ('onehot',ce.OneHotEncoder(cols=categories_features, use_cat_names=True))])

In [8]:
combPipe = Pipeline([('functionaltransformers',stock_pipeline),
                    ('column_selector', FunctionTransformer(transformers.column_selector, kw_args={'cols':features_for_model})),
                    ('onehotencoding', categories_pipe ),
                    ('scaling', transformers.CustomTransformerScaler(method='minmax'))])

In [9]:
combPipe.fit_transform(df)

Unnamed: 0,Open,High,Low,Close,Volume,Volatility,Day_of_week_1.0,Day_of_week_2.0,Day_of_week_3.0,Day_of_week_4.0,Day_of_week_0.0,Volatility_group_Low,Volatility_group_Medium,Volatility_group_High,Close_30_moving_avarage,Is_close_below_30_moving_avarage,Is_close_above_30_moving_avarage,Days_close_below_30_moving_avarage,Days_close_above_30_moving_avarage
0,0.085814,0.096401,0.090449,0.237573,0.295258,0.115584,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,,0.0,1.0,0.000000,0.014286
1,0.097012,0.098344,0.098235,0.241514,0.229936,0.056738,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,,0.0,1.0,0.000000,0.028571
2,0.094334,0.092517,0.094086,0.228781,0.263612,0.038911,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,,0.0,1.0,0.000000,0.042857
3,0.091562,0.088819,0.088006,0.216419,0.216179,0.062488,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,,0.0,1.0,0.000000,0.057143
4,0.079842,0.076718,0.061070,0.178548,0.467797,0.210849,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,,0.0,1.0,0.000000,0.071429
5,0.064328,0.064448,0.056978,0.179472,0.353068,0.126318,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,,0.0,1.0,0.000000,0.085714
6,0.058542,0.060340,0.060956,0.183358,0.192627,0.043895,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,,0.0,1.0,0.000000,0.100000
7,0.065686,0.063589,0.066033,0.188416,0.150441,0.025877,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,,0.0,1.0,0.000000,0.114286
8,0.061091,0.058061,0.060899,0.182021,0.185178,0.021085,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,,0.0,1.0,0.000000,0.128571
9,0.066393,0.062749,0.065256,0.186955,0.153180,0.025110,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,,0.0,1.0,0.000000,0.142857


In [104]:
transformed_df = stock_pipeline.fit_transform(df)
transformed_df.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Volatility,Day_of_week,Volatility_group,Close_30_moving_avarage,Is_close_below_30_moving_avarage,Is_close_above_30_moving_avarage,Days_close_below_30_moving_avarage,Days_close_above_30_moving_avarage
0,2012-01-03,325.25,332.83,324.97,663.59,7380500.0,7.86,1,Medium,,0,1,0,1
1,2012-01-04,331.27,333.87,329.08,666.45,5749400.0,4.79,2,Low,,0,1,0,2
2,2012-01-05,329.83,330.75,326.89,657.21,6590300.0,3.86,3,Low,,0,1,0,3
3,2012-01-06,328.34,328.77,323.68,648.24,5405900.0,5.09,4,Low,,0,1,0,4
4,2012-01-09,322.04,322.29,309.46,620.76,11688800.0,12.83,0,High,,0,1,0,5


In [144]:
transformed_df.columns[1:]

Index(['Open', 'High', 'Low', 'Close', 'Volume', 'Volatility', 'Day_of_week',
       'Volatility_group', 'Close_30_moving_avarage',
       'Is_close_below_30_moving_avarage', 'Is_close_above_30_moving_avarage',
       'Days_close_below_30_moving_avarage',
       'Days_close_above_30_moving_avarage'],
      dtype='object')