# Parameters

In [21]:
# File path
FILENAME_TRAIN = './datasets/mobile_classifier/train.csv'
FILENAME_TEST = './datasets/mobile_classifier/test.csv'

# Column when fetures start 
FEATURES_INIT = 0
# Column when fetures end 
FEATURES_END = 20

# Index of columns that need LabelEnconder 
FEATURES_LABELENCODER = []
FEATURES_LABELENCODER_OBJ = []

# Columns that need LabelEnconder 
FEATURES_ONEHOT = []
FEATURES_ONEHOT_OBJ = []

# Target column
TARGET = 20

# UTILS
RANDOM_STATE = 1

# Pre-processing

In [22]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV # sklearn.grid_search

def pre_processing(is_test=True):
    
    # Importar el data set
    dataset = pd.read_csv(FILENAME_TEST if is_test else FILENAME_TRAIN)

    X = dataset.iloc[:, FEATURES_INIT:FEATURES_END].values
    if not is_test:
        y = dataset.iloc[:, TARGET].values

    # Codificar datos categóricos
    for column in FEATURES_LABELENCODER:
        labelencoder = LabelEncoder()
        X[:, column] = labelencoder.fit_transform(X[:, column])
        FEATURES_LABELENCODER_OBJ.append(labelencoder) 


    for column in FEATURES_ONEHOT:
        transformer = ColumnTransformer(
            transformers=[
                ("Tranform_{}".format(column),     # Un nombre de la transformación
                 OneHotEncoder(categories='auto'), # La clase a la que transformar
                 [column]                          # Las columnas a transformar.
                 )
            ], remainder='passthrough'
        )

        X = transformer.fit_transform(X)
        X = X[:, 1:]
        FEATURES_ONEHOT_OBJ.append(transformer) 

        
    # Escalado de variables
    scaler_x = StandardScaler()
    X = scaler_x.fit_transform(X)
    
    print("Preprocession successful :)")
   
    if is_test:
        return X
    else:
        # Dividir el data set en conjunto de entrenamiento y conjunto de testing
        # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = RANDOM_STATE)
        return train_test_split(X, y, test_size = 0.2, random_state = RANDOM_STATE)
    

# PREPROCESSING
X_train, X_test, y_train, y_test = pre_processing(is_test=False)

Preprocession successful :)


# Model
The follows blocks must set two var

    	classifier = Estimator like any sklearn.ensemble or KerasClassifier (ANN)
    	parameters = Dictionary of params to optimizations 

In [23]:
# ANN Example

from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.models import Sequential
from sklearn.model_selection import cross_val_score

from tensorflow.keras.wrappers.scikit_learn import KerasClassifier


def build_classifier(optimizer, dropout, kernel_initializer, init_units):
    classifier = Sequential()
    classifier.add(Dense(units = 11, kernel_initializer = kernel_initializer,  activation = "relu", input_dim = 11))
    classifier.add(Dropout(dropout))
    classifier.add(Dense(units = init_units, kernel_initializer = kernel_initializer,  activation = "relu"))
    classifier.add(Dropout(dropout))
    classifier.add(Dense(units = 1, kernel_initializer = kernel_initializer,  activation = "sigmoid"))
    classifier.compile(optimizer = optimizer, loss = "binary_crossentropy", metrics = ["accuracy"])
    return classifier

classifier = KerasClassifier(build_fn = build_classifier, verbose=0)
parameters = {
    'batch_size' : [32],
    'epochs' : [100], 
    'optimizer' : ['nadam'],
    'dropout': [0.1],
    'kernel_initializer': ['glorot_uniform'],
    'init_units': [11]
}

In [24]:
# RANDOMFOREST Example
from sklearn.ensemble import RandomForestClassifier
classifier = RandomForestClassifier()
parameters = {
    'n_estimators': [100, 300, 500],
    'max_depth': [None, 3, 10],
    'random_state': [RANDOM_STATE]
}

# Training

In [25]:
grid_search = GridSearchCV(estimator = classifier, 
                           param_grid = parameters, 
                           scoring = 'accuracy', 
                           cv = 10,
                           n_jobs=-1)
grid_search = grid_search.fit(X_train, y_train)
best_parameters = grid_search.best_params_
best_accuracy = grid_search.best_score_
best_model = grid_search.best_estimator_ 

print("Best parameter:", best_parameters)
print("Best Accuracy:", best_accuracy)

Best parameter: {'max_depth': None, 'n_estimators': 500, 'random_state': 1}
Best Accuracy: 0.8825


# Testing
Validating the best model with the test file (if there is)

In [26]:
X_val = pre_processing(is_test=True)

Preprocession successful :)


In [27]:
y_pred = best_model.predict(X_val)
print(y_pred)

[2 0 2 3 0 0 1 3 0 1 1 2 2 3 1 2 2 0 2 0 3 2 2 1 1 2 1 0 0 1 2 0 3 1 3 2 2
 0 0 0 3 0 0 0 0 1 0 3 3 0 0 1 2 0 2 1 3 3 0 0 1 0 0 0 2 2 2 2 2 3 0 2 0 2
 3 1 1 2 0 2 0 2 2 2 0 2 0 0 0 1 1 2 0 3 0 0 3 1 2 1 0 3 0 3 0 1 3 1 0 1 1
 2 1 1 2 1 0 3 3 2 0 0 1 0 2 2 2 0 3 3 0 3 1 0 3 2 0 3 2 2 1 0 0 3 0 3 3 0
 3 0 0 2 2 1 2 1 3 0 0 0 2 1 3 3 3 0 1 1 3 0 0 0 2 2 1 2 1 3 0 1 3 1 1 3 1
 3 3 3 2 1 2 3 2 2 3 2 2 1 3 0 0 0 2 1 2 2 3 3 1 0 1 3 0 3 0 3 1 2 1 0 1 1
 2 2 1 3 2 0 3 0 3 1 1 2 0 3 1 0 1 0 3 0 3 0 1 1 2 0 2 0 0 2 1 0 2 1 3 2 1
 3 1 1 0 0 3 1 3 0 2 0 2 3 2 1 1 3 2 3 1 0 2 2 1 2 3 2 1 1 0 0 3 1 2 2 3 2
 2 0 0 2 0 0 3 3 2 3 1 1 1 3 3 0 0 2 3 0 3 3 0 1 1 2 1 3 3 2 0 1 0 0 0 3 3
 0 0 0 0 0 0 2 0 1 0 0 2 3 2 2 2 1 3 0 1 2 0 3 0 3 2 3 3 0 2 2 2 1 0 1 0 3
 2 0 2 0 2 2 0 1 2 3 1 0 3 2 3 0 0 3 3 0 0 1 1 0 1 0 3 2 0 0 0 1 3 1 0 0 2
 3 1 1 3 2 1 1 1 3 1 2 0 1 1 0 2 0 1 2 3 1 3 3 1 3 0 3 0 1 1 0 0 0 1 2 2 1
 1 3 2 2 0 0 0 0 1 1 0 0 2 0 3 3 0 3 2 0 3 0 1 3 3 3 1 2 0 2 3 3 2 2 3 2 1
 3 0 1 3 3 0 3 0 2 2 3 3 