<a href="https://colab.research.google.com/github/alexandergribenchenko/Data_Science_Self_Study/blob/main/Pipelines_Customized.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **01. Carga dataset base**

In [1]:
import pandas as pd
import numpy as np

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
path = '/content/drive/MyDrive/01_Code/16_Pipelines/'
file_name = 'titanic_train.csv'

In [4]:
df = pd.read_csv(path + file_name, sep=',', nrows=10)
df

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S
5,6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q
6,7,0,1,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,E46,S
7,8,0,3,"Palsson, Master. Gosta Leonard",male,2.0,3,1,349909,21.075,,S
8,9,1,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27.0,0,2,347742,11.1333,,S
9,10,1,2,"Nasser, Mrs. Nicholas (Adele Achem)",female,14.0,1,0,237736,30.0708,,C


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  10 non-null     int64  
 1   Survived     10 non-null     int64  
 2   Pclass       10 non-null     int64  
 3   Name         10 non-null     object 
 4   Sex          10 non-null     object 
 5   Age          9 non-null      float64
 6   SibSp        10 non-null     int64  
 7   Parch        10 non-null     int64  
 8   Ticket       10 non-null     object 
 9   Fare         10 non-null     float64
 10  Cabin        3 non-null      object 
 11  Embarked     10 non-null     object 
dtypes: float64(2), int64(5), object(5)
memory usage: 1.1+ KB


# **02. Class FeatureSelector**

In [6]:
from sklearn.base import BaseEstimator, TransformerMixin

In [7]:
#Custom Transformer that extracts columns passed as argument to its constructor 
class FeatureSelector( BaseEstimator, TransformerMixin ):
    #Class Constructor 
    def __init__(self, feature_names):
        self._feature_names = feature_names 
    
    #Return self nothing else to do here    
    def fit(self, X, y = None):
        return self 
    
    #Method that describes what we need this transformer to do
    def transform(self, X, y = None):
        return X[self._feature_names] 

In [8]:
columns_selected = ['PassengerId', 'Pclass', 'Sex', 'Age', 'Fare','Survived']

In [9]:
Transformer_FeatureSelector = FeatureSelector(feature_names=columns_selected)

In [10]:
df_transformed = Transformer_FeatureSelector.transform(df)
df_transformed

Unnamed: 0,PassengerId,Pclass,Sex,Age,Fare,Survived
0,1,3,male,22.0,7.25,0
1,2,1,female,38.0,71.2833,1
2,3,3,female,26.0,7.925,1
3,4,1,female,35.0,53.1,1
4,5,3,male,35.0,8.05,0
5,6,3,male,,8.4583,0
6,7,1,male,54.0,51.8625,0
7,8,3,male,2.0,21.075,0
8,9,3,female,27.0,11.1333,1
9,10,2,female,14.0,30.0708,1


In [11]:
df

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S
5,6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q
6,7,0,1,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,E46,S
7,8,0,3,"Palsson, Master. Gosta Leonard",male,2.0,3,1,349909,21.075,,S
8,9,1,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27.0,0,2,347742,11.1333,,S
9,10,1,2,"Nasser, Mrs. Nicholas (Adele Achem)",female,14.0,1,0,237736,30.0708,,C


# **03. Class LogScaler**

In [12]:
class LogScaler(BaseEstimator, TransformerMixin):
    
    def __init__(self, add_log_scaler = True, feature_names=None):
        self.add_log_scaler = add_log_scaler
        self.feature_names = feature_names
    
    def fit(self, X, y=None):
        return self
    
    def transform(self, X):
        if self.feature_names is None:
          salida = X
        else:
          salida = X.copy()
          for i in self.feature_names:
            salida[i] = np.log(salida[i])
        return salida

In [14]:
Transformer_LogScaler = LogScaler(feature_names=[['Fare', 'Age']])

In [15]:
df_transformed = Transformer_LogScaler.transform(df)
df_transformed

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,3.091042,1,0,A/5 21171,1.981001,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,3.637586,1,0,PC 17599,4.266662,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,3.258097,0,0,STON/O2. 3101282,2.070022,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,3.555348,1,0,113803,3.972177,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,3.555348,0,0,373450,2.085672,,S
5,6,0,3,"Moran, Mr. James",male,,0,0,330877,2.135148,,Q
6,7,0,1,"McCarthy, Mr. Timothy J",male,3.988984,0,0,17463,3.948596,E46,S
7,8,0,3,"Palsson, Master. Gosta Leonard",male,0.693147,3,1,349909,3.048088,,S
8,9,1,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,3.295837,0,2,347742,2.409941,,S
9,10,1,2,"Nasser, Mrs. Nicholas (Adele Achem)",female,2.639057,1,0,237736,3.403555,,C
