<a href="https://colab.research.google.com/github/alexandergribenchenko/Data_Science_Toolkit/blob/main/Sklearn_Transformers/DS_TS_OW_01_Customized_Transformers.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Customized transformers in Sklearn

In [None]:
import pandas as pd
import numpy as np

from sklearn.base import BaseEstimator, TransformerMixin

## FeatureSelector

### ---> Dataframe sample:

In [None]:
a = ['gato', 'perro', 'mico']
b = [1, 2, 3]
c = [11.11, 22.22, 33.33]

df = pd.DataFrame(list(zip(a,b,c)), 
                   columns=['aaa','bbb','ccc'])
df

Unnamed: 0,aaa,bbb,ccc
0,gato,1,11.11
1,perro,2,22.22
2,mico,3,33.33


### ---> Class:

In [None]:
params_FeatureSelector = {}
params_FeatureSelector['feature_names']= ['aaa','bbb']
params_FeatureSelector

{'feature_names': ['aaa', 'bbb']}

In [None]:
class FeatureSelector(BaseEstimator, TransformerMixin):
    
    def __init__(self, params_FeatureSelector):
        self.feature_names = params_FeatureSelector['feature_names'] 
    
    def fit(self, X, y = None):
        return self 
    
    def transform(self, X, y = None):
        return X[self.feature_names]

In [None]:
Transformer_FeatureSelector = FeatureSelector(params_FeatureSelector)

In [None]:
df_transformed = Transformer_FeatureSelector.transform(df)
df_transformed

Unnamed: 0,aaa,bbb
0,gato,1
1,perro,2
2,mico,3


## TypeAssignator

### ---> Dataframe sample:

In [None]:
a = ['gato', 'perro', 'mico']
b = ['1', '2', '3']
c = ['11.11', '22.22', '33.33']
d = ['11.11', '22.22', '33.33']

df = pd.DataFrame(list(zip(a,b,c,d)), 
                   columns=['aaa','bbb','ccc','ddd'])
df

Unnamed: 0,aaa,bbb,ccc,ddd
0,gato,1,11.11,11.11
1,perro,2,22.22,22.22
2,mico,3,33.33,33.33


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   aaa     3 non-null      object
 1   bbb     3 non-null      object
 2   ccc     3 non-null      object
 3   ddd     3 non-null      object
dtypes: object(4)
memory usage: 224.0+ bytes


### ---> Class:

In [None]:
params_TypeAssignator = {}
params_TypeAssignator['cols_type_int']= ['bbb']
params_TypeAssignator['cols_type_float']= ['ccc','ddd']
params_TypeAssignator['cols_type_str']= ['aaa']
params_TypeAssignator

{'cols_type_int': ['bbb'],
 'cols_type_float': ['ccc', 'ddd'],
 'cols_type_str': ['aaa']}

In [None]:
class TypeAssignator(BaseEstimator, TransformerMixin):
  
  def __init__(self, params_TypeAssignator):
    self.params_TypeAssignator = params_TypeAssignator
    
  def fit(self, X, y = None):
    return self 
    
  def transform(self, X, y = None):
    salida = X.copy()
    for key in self.params_TypeAssignator.keys():
      if key =='cols_type_int':
        salida[self.params_TypeAssignator[key]] = salida[self.params_TypeAssignator[key]].astype(int)
      elif key =='cols_type_float':
        salida[self.params_TypeAssignator[key]] = salida[self.params_TypeAssignator[key]].astype(float)
      elif key =='cols_type_str':
        salida[self.params_TypeAssignator[key]] = salida[self.params_TypeAssignator[key]].astype(str)      
    return salida

In [None]:
Transformer_TypeAssignator = TypeAssignator(params_TypeAssignator)

In [None]:
Transformer_TypeAssignator.params_TypeAssignator

{'cols_type_int': ['bbb'],
 'cols_type_float': ['ccc', 'ddd'],
 'cols_type_str': ['aaa']}

In [None]:
df_transformed = Transformer_TypeAssignator.transform(df)
df_transformed

Unnamed: 0,aaa,bbb,ccc,ddd
0,gato,1,11.11,11.11
1,perro,2,22.22,22.22
2,mico,3,33.33,33.33


In [None]:
df_transformed.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   aaa     3 non-null      object 
 1   bbb     3 non-null      int64  
 2   ccc     3 non-null      float64
 3   ddd     3 non-null      float64
dtypes: float64(2), int64(1), object(1)
memory usage: 224.0+ bytes


# Fin (Depurado)

In [None]:
for k in range(5):
    exec(f'cat_{k} = k*2')

In [None]:
# # Va para las curiosidaes de python
# for key in params_TypeAssignator.keys():
#   print(key)
#   exec(f'print(params_TypeAssignator[key])')

#   class TypeAssignator(BaseEstimator, TransformerMixin):
  
# # Entender diferencia contra llamado (caso : key) 
#   def __init__(self, params_TypeAssignator):
#     for key in params_TypeAssignator.keys():
#       exec(f'self.{key} = params_TypeAssignator[key]')