# Convolutional Neural Network

## Model
![](https://cdn-images-1.medium.com/max/800/0*wigQtmJiv0bddwPI.)

## Source

 - [Medium article](https://towardsdatascience.com/understanding-how-convolutional-neural-network-cnn-perform-text-classification-with-word-d2ee64b9dd0b)
 - [White paper](http://www.aclweb.org/anthology/D14-1181)

## Dados

In [2]:
import keras
import numpy as np
from src.datasetAPI import RotaDosConcursos
from gensim.models import KeyedVectors
from nltk.tokenize import word_tokenize

Using TensorFlow backend.


In [3]:
number = 100
dimensions = 50
categories_count = 50
input_layer = keras.Input([number,dimensions])

In [4]:
train_data = RotaDosConcursos(subset='train')
test_data = RotaDosConcursos(subset='test')

train_categories_len = len(train_data.target_names)

In [None]:
word_embed_path = 'dataset/word2vec/cbow_s%d.txt'%dimensions
word_embed_model = KeyedVectors.load_word2vec_format(
    word_embed_path,
    unicode_errors="ignore")

In [None]:
setence_vectors = []
for setence in train_data.clean_text:
    try:
        setence_vectors.append([word_embed_model[word] for word in word_tokenize(setence.lower())])
    except KeyError:
        pass
    except AttributeError:
        print(setence)

## Normalization

In [None]:
max_setence_size = max([len(setence) for setence in setence_vectors])

In [None]:
normalized_input = [np.concatenate((setence,np.zeros((max_setence_size - len(setence),dimensions)))) for setence in setence_vectors]

## Model

In [None]:
filter_layer_4_1 = keras.layers.Conv1D(1, 4, activation='relu')(input_layer)
filter_layer_4_2 = keras.layers.Conv1D(1, 4, activation='relu')(input_layer)

filter_layer_3_1 = keras.layers.Conv1D(1, 3, activation='relu')(input_layer)
filter_layer_3_2 = keras.layers.Conv1D(1, 3, activation='relu')(input_layer)

filter_layer_2_1 = keras.layers.Conv1D(1, 2, activation='relu')(input_layer)
filter_layer_2_2 = keras.layers.Conv1D(1, 2, activation='relu')(input_layer)

In [None]:
max_layer_4_1 = keras.layers.MaxPooling1D(number - 4 + 1)(filter_layer_4_1)
max_layer_4_2 = keras.layers.MaxPooling1D(number - 4 + 1)(filter_layer_4_2)

max_layer_3_1 = keras.layers.MaxPooling1D(number - 3 + 1)(filter_layer_3_1)
max_layer_3_2 = keras.layers.MaxPooling1D(number - 3 + 1)(filter_layer_3_2)

max_layer_2_1 = keras.layers.MaxPooling1D(number - 2 + 1)(filter_layer_2_1)
max_layer_2_2 = keras.layers.MaxPooling1D(number - 2 + 1)(filter_layer_2_2)

In [None]:
concat1max = keras.layers.Concatenate(axis=1)([max_layer_4_1,
                                               max_layer_4_2,
                                               max_layer_3_1,
                                               max_layer_3_2,
                                               max_layer_2_1,
                                               max_layer_2_2])

In [None]:
output_layer = keras.layers.Dense(categories_count)(concat1max)

In [None]:
model = keras.Model(input_layer,output_layer)

In [None]:
model.compile(optimizer='adam',loss='categorical_crossentropy')

In [None]:
model.evaluate(np.array(normalized_input))