# Parameters

In [1]:
# File path
FILENAME = './datasets/winequality_classifier.csv'

# Column when fetures start 
FEATURES_INIT = 0
# Column when fetures end 
FEATURES_END = 12

# Index of columns that need LabelEnconder 
FEATURES_LABELENCODER = [0]
FEATURES_LABELENCODER_OBJ = []

# Columns that need LabelEnconder 
FEATURES_ONEHOT = []
FEATURES_ONEHOT_OBJ = []

# Target column
TARGET = 12

# UTILS
RANDOM_STATE = 1

# Pre-processing

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV # sklearn.grid_search

# Importar el data set
dataset = pd.read_csv(FILENAME)

# Limpiar NaN
dataset = dataset.replace([np.inf, -np.inf], np.nan).dropna()

X = dataset.iloc[:, FEATURES_INIT:FEATURES_END].values
y = dataset.iloc[:, TARGET].values

# Codificar datos categóricos
for column in FEATURES_LABELENCODER:
    labelencoder = LabelEncoder()
    X[:, column] = labelencoder.fit_transform(X[:, column])
    FEATURES_LABELENCODER_OBJ.append(labelencoder) 


for column in FEATURES_ONEHOT:
    transformer = ColumnTransformer(
        transformers=[
            ("Tranform_{}".format(column),     
             OneHotEncoder(categories='auto'), 
             [column]                          # Las columnas a transformar.
             )
        ], remainder='passthrough'
    )

    X = transformer.fit_transform(X)
    X = X[:, 1:]
    FEATURES_ONEHOT_OBJ.append(transformer) 

    
# Dividir el data set en conjunto de entrenamiento y conjunto de testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = RANDOM_STATE)

# Escalado de variables
scaler_x = StandardScaler()
X_train = scaler_x.fit_transform(X_train)
X_test = scaler_x.transform(X_test)




# Model
The follows blocks must set two var

    	classifier = Estimator like any sklearn.ensemble or KerasClassifier (ANN)
    	parameters = Dictionary of params to optimizations 

In [15]:
# ANN Example

from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.models import Sequential
from sklearn.model_selection import cross_val_score

from tensorflow.keras.wrappers.scikit_learn import KerasClassifier


def build_classifier(optimizer, dropout, kernel_initializer, init_units):
    classifier = Sequential()
    classifier.add(Dense(units = 11, kernel_initializer = kernel_initializer,  activation = "relu", input_dim = 11))
    classifier.add(Dropout(dropout))
    classifier.add(Dense(units = init_units, kernel_initializer = kernel_initializer,  activation = "relu"))
    classifier.add(Dropout(dropout))
    classifier.add(Dense(units = 1, kernel_initializer = kernel_initializer,  activation = "sigmoid"))
    classifier.compile(optimizer = optimizer, loss = "binary_crossentropy", metrics = ["accuracy"])
    return classifier

classifier = KerasClassifier(build_fn = build_classifier, verbose=0)
parameters = {
    'batch_size' : [32],
    'epochs' : [100], 
    'optimizer' : ['nadam'],
    'dropout': [0.1],
    'kernel_initializer': ['glorot_uniform'],
    'init_units': [11]
}

In [3]:
# RANDOMFOREST Example
from sklearn.ensemble import RandomForestClassifier
classifier = RandomForestClassifier()

# Number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start = 200, stop = 2000, num = 200)]
# Number of features to consider at every split
max_features = ['auto', 'sqrt']
# Maximum number of levels in tree
max_depth = [int(x) for x in np.linspace(10, 110, num = 50)]
max_depth.append(None)
# Minimum number of samples required to split a node
min_samples_split = [2, 5, 10]
# Minimum number of samples required at each leaf node
min_samples_leaf = [1, 2, 4]
# Method of selecting samples for training each tree
bootstrap = [True, False]
# Create the random grid
parameters = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
               'bootstrap': bootstrap,
               'random_state': [RANDOM_STATE]}

# Training

In [None]:
grid_search = GridSearchCV(estimator = classifier, 
                           param_grid = parameters, 
                           scoring = 'accuracy', 
                           cv = 10,
                           n_jobs=-1)
grid_search = grid_search.fit(X_train, y_train)
best_parameters = grid_search.best_params_
best_accuracy = grid_search.best_score_

print("Best parameter:", best_parameters)
print("Best Accuracy:", best_accuracy)

