# Adaptive Boosting Method

# Determinar se um cliente realizou a compra através de uma propaganda utilizando o método de Adaptive Boosting Method (AdaBoost)

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [4]:
dados = pd.read_csv('Social_Network_Ads.csv')

In [5]:
dados.head()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0
2,15668575,Female,26,43000,0
3,15603246,Female,27,57000,0
4,15804002,Male,19,76000,0


Coluna User ID não é necessária para o modelo pois é apenas um identificador do usuário

In [6]:
dados.columns

Index(['User ID', 'Gender', 'Age', 'EstimatedSalary', 'Purchased'], dtype='object')

In [7]:
dados = dados.drop('User ID',axis=1)

In [10]:
dados.tail(20)

Unnamed: 0,Gender,Age,EstimatedSalary,Purchased
380,Male,42,64000,0
381,Male,48,33000,1
382,Female,44,139000,1
383,Male,49,28000,1
384,Female,57,33000,1
385,Male,56,60000,1
386,Female,49,39000,1
387,Male,39,71000,0
388,Male,47,34000,1
389,Female,48,35000,1


In [8]:
dados.head()

Unnamed: 0,Gender,Age,EstimatedSalary,Purchased
0,Male,19,19000,0
1,Male,35,20000,0
2,Female,26,43000,0
3,Female,27,57000,0
4,Male,19,76000,0


Convertendo a variável categórica Gender para numérica

In [9]:
from sklearn.preprocessing import LabelEncoder

Instanciamos o objeto encoder

In [11]:
enconder = LabelEncoder()

O Conteúdo da coluna gênero será transformado para um valor númerico

In [12]:
dados['Gender'] = enconder.fit_transform(dados['Gender'])

In [13]:
dados.head()

Unnamed: 0,Gender,Age,EstimatedSalary,Purchased
0,1,19,19000,0
1,1,35,20000,0
2,0,26,43000,0
3,0,27,57000,0
4,1,19,76000,0


Normalizando as colunas (remodelar colunas numéricas para uma escala padrão) "Age" e "Estimated Salary"

In [14]:
cols = ['Age','EstimatedSalary']

In [15]:
from sklearn.preprocessing import StandardScaler

In [16]:
sc = StandardScaler()

In [17]:
dados[cols] = sc.fit_transform(dados[cols])

In [18]:
dados.head()

Unnamed: 0,Gender,Age,EstimatedSalary,Purchased
0,1,-1.781797,-1.490046,0
1,1,-0.253587,-1.460681,0
2,0,-1.113206,-0.78529,0
3,0,-1.017692,-0.374182,0
4,1,-1.781797,0.183751,0


Determinando as variáveis X e Y

In [19]:
X = dados.drop('Purchased',axis=1).values
Y = dados['Purchased'].values

Separando em amostra de treino(75%) e teste(25%)

In [20]:
from sklearn.model_selection import train_test_split

In [21]:
X_treino,X_teste,Y_treino,Y_teste=train_test_split(X,Y,test_size=0.25,random_state=0)

Aplicando modelo AdaBoost

In [22]:
from sklearn.ensemble import AdaBoostClassifier

In [23]:
modelo =AdaBoostClassifier()

In [24]:
modelo.fit(X_treino,Y_treino)

AdaBoostClassifier()

In [25]:
modelo.score(X_treino,Y_treino)

0.94

In [26]:
Y_previsto = modelo.predict(X_teste)

In [27]:
modelo.score(X_teste,Y_teste)

0.92

Gerando matriz de confusão

In [28]:
from sklearn.metrics import confusion_matrix

In [29]:
cm = confusion_matrix(Y_teste, Y_previsto)

In [30]:
cm

array([[64,  4],
       [ 4, 28]])

Mais métricas

In [31]:
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score

In [32]:
acc = accuracy_score(Y_teste,Y_previsto)

In [33]:
print("Acuracia = {:0.2f}%".format(acc*100))

Acuracia = 92.00%


In [34]:
prec= precision_score(Y_teste,Y_previsto)

In [35]:
print("Precisao = {:0.2f}%".format(prec*100))

Precisao = 87.50%


In [36]:
rec = recall_score(Y_teste,Y_previsto)

In [37]:
print("Recall = {:0.2f}%".format(rec*100))

Recall = 87.50%


In [38]:
f1 = f1_score(Y_teste,Y_previsto)

In [39]:
print("F1 = {:0.2f}%".format(f1*100))

F1 = 87.50%
