#**KerasClassifier**

In [None]:
!pip install scikeras

In [11]:
import pandas as pd
import numpy as np
import tensorflow as tf
from keras import backend as k
import matplotlib.pyplot as plt
from keras.models import Sequential 
from keras.layers import Dense, Dropout
from scikeras.wrappers import KerasClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder, OrdinalEncoder

In [13]:
df = pd.read_csv("advertising.csv")
df.head()

Unnamed: 0,Daily Time Spent on Site,Age,Area Income,Daily Internet Usage,Ad Topic Line,City,Male,Country,Timestamp,Clicked on Ad
0,68.95,35,61833.9,256.09,Cloned 5thgeneration orchestration,Wrightburgh,0,Tunisia,2016-03-27 00:53:11,0
1,80.23,31,68441.85,193.77,Monitored national standardization,West Jodi,1,Nauru,2016-04-04 01:39:02,0
2,69.47,26,59785.94,236.5,Organic bottom-line service-desk,Davidton,0,San Marino,2016-03-13 20:35:42,0
3,74.15,29,54806.18,245.89,Triple-buffered reciprocal time-frame,West Terrifurt,1,Italy,2016-01-10 02:31:19,0
4,68.37,35,73889.99,225.58,Robust logistical utilization,South Manuel,0,Iceland,2016-06-03 03:36:18,0


**Verificando se há dados nulos:**

In [10]:
df.isna().sum()

Daily Time Spent on Site    0
Age                         0
Area Income                 0
Daily Internet Usage        0
Ad Topic Line               0
City                        0
Male                        0
Country                     0
Timestamp                   0
Clicked on Ad               0
dtype: int64

In [21]:
df.head()

Unnamed: 0,Daily Time Spent on Site,Age,Area Income,Daily Internet Usage,Ad Topic Line,City,Male,Country,Timestamp,Clicked on Ad
0,0.249267,-0.114905,0.509691,1.73403,-1.415086,1.700304,0.962695,1.424572,-0.209578,0
1,0.961132,-0.570425,1.00253,0.313805,-0.122976,1.492546,-1.03875,0.451875,-0.088335,0
2,0.282083,-1.139826,0.356949,1.287589,0.230363,-1.344415,0.962695,0.981136,-0.458994,0
3,0.577432,-0.798185,-0.014456,1.50158,1.397766,1.621499,-1.03875,-0.177517,-1.53633,0
4,0.212664,-0.114905,1.408868,1.038731,0.923184,1.141508,0.962695,-0.277648,0.926648,0


**Transformando as Features categóricas em numéricas - OneHotEnconder - Features de dados binários:**

In [15]:
#"Male"
ohe =  OneHotEncoder(handle_unknown='ignore')
ohe.fit(df[["Male"]])

df["Male"] = ohe.transform(df[["Male"]]).toarray()

**Transformando as Features categóricas em numéricas - OrdinalEncoder() - Feature com mais de 2 classes.**

In [17]:
# "Ad Topic Line", "City", "Country", "Timestamp"
oe =  OrdinalEncoder()
oe.fit(df[["Ad Topic Line", "City", "Country", "Timestamp"]])

df[["Ad Topic Line", "City", "Country", "Timestamp"]] = oe.transform(df[["Ad Topic Line", "City", "Country", "Timestamp"]])

**Vamos escalonar todas as Features, pois a amplitude entre os dados está muito grande, transformar todas com média próximo de zero e desvio padrão próximo de 1**

In [19]:
df.columns

Index(['Daily Time Spent on Site', 'Age', 'Area Income',
       'Daily Internet Usage', 'Ad Topic Line', 'City', 'Male', 'Country',
       'Timestamp', 'Clicked on Ad'],
      dtype='object')

In [20]:
ssc = StandardScaler()

ssc.fit(df[['Daily Time Spent on Site', 'Age', 'Area Income','Daily Internet Usage', 'Ad Topic Line', 'City', 'Male', 'Country','Timestamp']])
h= ssc.transform(df[['Daily Time Spent on Site', 'Age', 'Area Income','Daily Internet Usage', 'Ad Topic Line', 'City', 'Male', 'Country','Timestamp']])
df[['Daily Time Spent on Site', 'Age', 'Area Income', 'Daily Internet Usage', 'Ad Topic Line', 'City', 'Male', 'Country','Timestamp']] = h

In [None]:
df.columns

Index(['city', 'area', 'rooms', 'bathroom', 'parking spaces', 'floor',
       'animal', 'furniture', 'hoa (R$)', 'rent amount (R$)',
       'property tax (R$)', 'fire insurance (R$)', 'total (R$)'],
      dtype='object')

**Separando os dados em teste e treino:**

In [22]:
X = df.drop(columns=["Clicked on Ad"])
y = df["Clicked on Ad"]

In [23]:
def criarRede(optimizer='rmsprop', init='glorot_uniform'):
    k.clear_session()
    rn = Sequential([
                #1ª camada oculta
               tf.keras.layers.Dense(units=5, activation = "relu", kernel_initializer = "random_uniform", input_dim=9), # input_dim = nº de colunas
               tf.keras.layers.Dropout(0.2),
               #2ª camada oculta
               tf.keras.layers.Dense(units=5, activation = "relu", kernel_initializer = "random_uniform"),
               tf.keras.layers.Dropout(0.2), #Dropout = % de colunas zeradas para não dar overfitting! Recomendável entre 0.2 e 0.3
               #3ª camada oculta
               tf.keras.layers.Dense(units=5, activation = "relu", kernel_initializer = "random_uniform"),
               tf.keras.layers.Dropout(0.2), #Dropout = % de colunas zeradas para não dar overfitting! Recomendável entre 0.2 e 0.3
               #4º camada oculta
               tf.keras.layers.Dense(units=5, activation = "relu", kernel_initializer = "random_uniform"),
               tf.keras.layers.Dropout(0.2), #Dropout = % de colunas zeradas para não dar overfitting! Recomendável entre 0.2 e 0.3
               #Camada de saída
               tf.keras.layers.Dense(units= 1, activation = 'sigmoid')])
    

    rn.compile(optimizer = optimizer, loss = "binary_crossentropy", metrics = ['binary_accuracy'])


    return rn

In [24]:
rn = KerasClassifier(model=criarRede)
#============================================================================================
optimizers = ['rmsprop', 'adam']
init = ['glorot_uniform', 'normal', 'uniform']
epochs = [50, 100, 150]
batches = [5, 10, 20]
param_grid = dict(optimizer=optimizers, epochs=epochs, batch_size=batches, model__init=init)

#================================================================================
grid_search = GridSearchCV(estimator=rn,
                           param_grid=param_grid,
                           scoring="accuracy",
                           cv=5)

grid_search_bc = grid_search.fit(X, y)

[1;30;43mA saída de streaming foi truncada nas últimas 5000 linhas.[0m
Epoch 110/150
Epoch 111/150
Epoch 112/150
Epoch 113/150
Epoch 114/150
Epoch 115/150
Epoch 116/150
Epoch 117/150
Epoch 118/150
Epoch 119/150
Epoch 120/150
Epoch 121/150
Epoch 122/150
Epoch 123/150
Epoch 124/150
Epoch 125/150
Epoch 126/150
Epoch 127/150
Epoch 128/150
Epoch 129/150
Epoch 130/150
Epoch 131/150
Epoch 132/150
Epoch 133/150
Epoch 134/150
Epoch 135/150
Epoch 136/150
Epoch 137/150
Epoch 138/150
Epoch 139/150
Epoch 140/150
Epoch 141/150
Epoch 142/150
Epoch 143/150
Epoch 144/150
Epoch 145/150
Epoch 146/150
Epoch 147/150
Epoch 148/150
Epoch 149/150
Epoch 150/150
Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/15

**Salvar o modelo:**

In [25]:
import pickle

In [26]:
with open("grid_search_bc.pkl", "wb") as f:
    pickle.dump(grid_search_bc, f)

In [None]:
with open("grid_search_bc.pkl", "rb") as f:
    
    grid_search_bc = pickle.load(f)

In [27]:
grid_search_bc.best_params_

{'batch_size': 20, 'epochs': 50, 'model__init': 'normal', 'optimizer': 'adam'}

**Score:**

In [28]:
grid_search_bc.best_score_

0.9710000000000001

In [None]:
y_pred = grid_search_bc.predict(X)



In [None]:
tabela = pd.DataFrame()
tabela["y real"] = y
tabela["y preditivo"] = y_pred
tabela.head()

Unnamed: 0,y real,y preditivo
0,0,0
1,0,0
2,0,0
3,0,0
4,0,0


In [None]:
lista = []
for c in range(tabela.shape[0]):
  if tabela.loc[[c]].values[0][2] < tabela.loc[[c]].values[0][1]:
      valor = (tabela.loc[[c]].values[0][2] / tabela.loc[[c]].values[0][1])*100
      lista.append(valor)
  elif tabela.loc[[c]].values[0][2] > tabela.loc[[c]].values[0][1]:
      valor = (tabela.loc[[c]].values[0][1] / tabela.loc[[c]].values[0][2])*100
      lista.append(valor)
media = np.mean(lista)
print(f"Teve uma média de acerto de {media:.2f}%")

Teve uma média de acerto de 91.27%


In [None]:
plt.figure(figsize=(40,8))
x_ax = range(len(y))
plt.plot(x_ax, y, label="Valores reais")
plt.plot(x_ax, y_pred, label="Valores preditivos")
plt.title("Comparação dos valores reais e preditivos")
plt.legend()
plt.show();