# Classificação com Rede Neural 

## Importações

In [9]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import confusion_matrix
from keras.models import Sequential
from keras.layers import Dense, Dropout

In [3]:
spam = pd.read_csv("spam.csv")
spam.head()

Unnamed: 0,Category,Message
0,ham,"Go until jurong point, crazy.. Available only ..."
1,ham,Ok lar... Joking wif u oni...
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...
3,ham,U dun say so early hor... U c already then say...
4,ham,"Nah I don't think he goes to usf, he lives aro..."


## Categorização de Rótulos e Separação das Variáveis

In [4]:
labelencoder = LabelEncoder()
y = labelencoder.fit_transform(spam['Category'])
print(y)

[0 0 1 ... 0 0 0]


In [5]:
mensagens = spam['Message'].values

In [6]:
X_train, X_test, y_train, y_test = train_test_split(mensagens, y, test_size=0.3)
print(X_train)

['Hope you enjoyed your new content. text stop to 61610 to unsubscribe. help:08712400602450p Provided by tones2you.co.uk'
 'I hope you arnt pissed off but id would really like to see you tomorrow. Love me xxxxxxxxxxxxxX'
 'How is your schedule next week? I am out of town this weekend.' ...
 "&lt;#&gt; , that's all? Guess that's easy enough"
 'Ok.ok ok..then..whats ur todays plan'
 'YOUR CHANCE TO BE ON A REALITY FANTASY SHOW call now = 08707509020 Just 20p per min NTT Ltd, PO Box 1327 Croydon CR9 5WB 0870 is a national = rate call']


## Vetorização

In [7]:
vetorizador = CountVectorizer()
vetorizador.fit(X_train)
X_train = vetorizador.transform(X_train)
X_test = vetorizador.transform(X_test)

In [8]:
print(X_train.toarray())

[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]


In [13]:
# Mostrar todo vetor
np.set_printoptions(threshold=np.inf)

In [14]:
X_train.toarray()[0]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [15]:
print(X_train.shape[1])

7297


## Criação e Treinamento do Modelo

In [16]:
modelo = Sequential()
modelo.add(Dense(units=10,activation="relu",input_dim=X_train.shape[1])) # camada de entrada
modelo.add(Dropout(0.1))
modelo.add(Dense(units=8,activation="relu")) # camada oculta
modelo.add(Dropout(0.1))
modelo.add(Dense(units=1,activation="sigmoid"))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [17]:
modelo.compile(loss="mean_squared_error", optimizer="adam", metrics=["accuracy"])
modelo.summary() # da estrutura da rede neural

Sobre alguns parâmtros, vale salientar a importância de:
- Epoch: vão ser passados todos os dados de treino pela rede a cada epoch. Não pode ser nem muito grande e nem muito pequena se não a rede não vai aprender.

In [18]:
modelo.fit(X_train, y_train,epochs=20,batch_size=10, verbose=True, validation_data=(X_test, y_test))

Epoch 1/20
[1m390/390[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.7995 - loss: 0.2204 - val_accuracy: 0.9839 - val_loss: 0.0421
Epoch 2/20
[1m390/390[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9872 - loss: 0.0342 - val_accuracy: 0.9862 - val_loss: 0.0128
Epoch 3/20
[1m390/390[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9936 - loss: 0.0119 - val_accuracy: 0.9856 - val_loss: 0.0119
Epoch 4/20
[1m390/390[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9970 - loss: 0.0059 - val_accuracy: 0.9844 - val_loss: 0.0119
Epoch 5/20
[1m390/390[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9970 - loss: 0.0060 - val_accuracy: 0.9844 - val_loss: 0.0121
Epoch 6/20
[1m390/390[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9978 - loss: 0.0046 - val_accuracy: 0.9850 - val_loss: 0.0121
Epoch 7/20
[1m390/390[0m 

<keras.src.callbacks.history.History at 0x206360fe570>

## Previsões e Métricas

In [19]:
loss, accuracy = modelo.evaluate(X_test,y_test)
print("Loss: ", loss)
print("Acurácia: ", accuracy)

[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9844 - loss: 0.0130
Loss:  0.011709989979863167
Acurácia:  0.9856459498405457


In [20]:
nova_previsao = modelo.predict(X_test)
print(nova_previsao)

[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[[1.00000000e+00]
 [2.62473004e-05]
 [2.09503385e-04]
 [1.00000000e+00]
 [3.90159170e-04]
 [9.20202613e-01]
 [2.57537104e-05]
 [7.60065019e-02]
 [1.62793417e-03]
 [9.21839732e-04]
 [2.96055921e-03]
 [1.93151715e-03]
 [5.51972538e-04]
 [9.99999583e-01]
 [1.30176765e-03]
 [4.80439659e-04]
 [1.01607548e-04]
 [1.00000000e+00]
 [7.35278353e-02]
 [2.07402190e-04]
 [1.04839928e-05]
 [4.89964987e-05]
 [4.87380719e-04]
 [4.67058544e-06]
 [1.00000000e+00]
 [1.07295731e-04]
 [1.56338399e-04]
 [1.81966345e-03]
 [4.11824658e-05]
 [1.55997102e-03]
 [2.66141462e-04]
 [4.65319317e-06]
 [1.29480730e-04]
 [8.08225104e-06]
 [5.77181345e-04]
 [7.99258778e-05]
 [1.90840499e-03]
 [4.02555145e-07]
 [2.18167179e-03]
 [1.72112938e-02]
 [1.17519587e-04]
 [1.47612020e-03]
 [1.64529518e-03]
 [1.13649010e-04]
 [8.41984467e-04]
 [1.06610780e-04]
 [4.05772182e-04]
 [1.00000000e+00]
 [1.29605993e-04]
 [1.31013185e-05]
 [2.44104955e-03]
 [2.57664

In [21]:
prev = (nova_previsao > 0.5)
print(prev)

[[ True]
 [False]
 [False]
 [ True]
 [False]
 [ True]
 [False]
 [False]
 [False]
 [False]
 [False]
 [False]
 [False]
 [ True]
 [False]
 [False]
 [False]
 [ True]
 [False]
 [False]
 [False]
 [False]
 [False]
 [False]
 [ True]
 [False]
 [False]
 [False]
 [False]
 [False]
 [False]
 [False]
 [False]
 [False]
 [False]
 [False]
 [False]
 [False]
 [False]
 [False]
 [False]
 [False]
 [False]
 [False]
 [False]
 [False]
 [False]
 [ True]
 [False]
 [False]
 [False]
 [False]
 [ True]
 [False]
 [False]
 [False]
 [False]
 [False]
 [False]
 [False]
 [False]
 [False]
 [ True]
 [False]
 [False]
 [False]
 [False]
 [False]
 [False]
 [ True]
 [False]
 [False]
 [False]
 [False]
 [False]
 [False]
 [False]
 [False]
 [False]
 [ True]
 [False]
 [ True]
 [False]
 [False]
 [False]
 [False]
 [False]
 [False]
 [False]
 [False]
 [False]
 [ True]
 [False]
 [False]
 [False]
 [False]
 [False]
 [False]
 [False]
 [False]
 [False]
 [ True]
 [False]
 [ True]
 [False]
 [False]
 [False]
 [ True]
 [False]
 [False]
 [False]
 

In [22]:
cm = confusion_matrix(y_test, prev)
print(cm)

[[1431    4]
 [  20  217]]
