# Construyendo una red neuronal con Keras

## Librerías

In [1]:
%matplotlib inline

import numpy as np
import pandas as pd

import matplotlib
import matplotlib.pyplot as plt
from IPython.display import SVG

from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from tqdm import tqdm  # https://github.com/tqdm/tqdm

import keras
from keras.layers import Activation, Dense, Dropout
from keras.models import Sequential
from keras.utils.vis_utils import model_to_dot
from keras import optimizers, regularizers

Using TensorFlow backend.


In [2]:
np.random.seed(1)  # For Reproducibility

In [3]:
from exercise_1 import load_dataset

X_train, X_test, y_train, y_test_orginal = load_dataset()

Training samples 1500, test_samples 500


In [4]:
print("Len X_train         =", len(X_train))
print("Len y_train         =", len(y_train))
print("==========================================")
print("Len X_test          =", len(X_test))
print("Len y_test_original =", len(y_test_orginal))

Len X_train         = 1500
Len y_train         = 1500
Len X_test          = 500
Len y_test_original = 500


In [5]:
X_train[0]

b'here\'s a word analogy : amistad is to the lost world as schindler\'s list is to jurassic park . \nin 1993 , after steven spielberg made the monster dino hit , many critics described schindler\'s list as the director\'s " penance " ( as if there was a need for him to apologize for making a crowd-pleasing blockbuster ) . \nnow , after a three-year layoff , spielberg is back with a vengeance . \nonce again , his summer release was special effects-loaded action/adventure flick with dinosaurs munching on human appetizers . \nnow , following his 1993 pattern , he has fashioned another serious , inspirational christmas release about the nature of humanity . \nthat film is amistad . \nalthough not as masterful as schindler\'s list , amistad is nevertheless a gripping motion picture . \nthematically rich , impeccably crafted , and intellectually stimulating , the only area where this movie falls a little short is in its emotional impact . \nwatching schindler\'s list was a powerful , almost 

In [6]:
y_train

array([1, 0, 0, ..., 0, 1, 1])

In [7]:
vocabulary = set()
train_x_y = X_train + X_test
for review in train_x_y:
    for token in review.split():
        vocabulary.add(token)

In [8]:
len(vocabulary)

50920

In [9]:
from collections import Counter

print("Counter y_train         =", Counter(y_train))
print("Counter y_test_original =", Counter(y_test_orginal))

Counter y_train         = Counter({1: 757, 0: 743})
Counter y_test_original = Counter({0: 257, 1: 243})


### Usaremos como Vectorizador: TFIDF

In [10]:
# Mejores parametros del Vectorizador (Usados en Aprendizaje Supervisado)

best_params_vect = {
    'analyzer': 'word',
    'binary': True,
    'lowercase': True,
    'max_df': 0.3,
    'min_df': 4,
    'ngram_range': (1, 2),
    'stop_words': None,
    'strip_accents': None,
    'tokenizer': None
}

vect = TfidfVectorizer(**best_params_vect)
vect

TfidfVectorizer(analyzer='word', binary=True, decode_error='strict',
        dtype=<class 'numpy.int64'>, encoding='utf-8', input='content',
        lowercase=True, max_df=0.3, max_features=None, min_df=4,
        ngram_range=(1, 2), norm='l2', preprocessor=None, smooth_idf=True,
        stop_words=None, strip_accents=None, sublinear_tf=False,
        token_pattern='(?u)\\b\\w\\w+\\b', tokenizer=None, use_idf=True,
        vocabulary=None)

In [11]:
X_train_vec = vect.fit_transform(X_train).toarray()
display(X_train_vec)

print("\n")
print("Type X_train_vec =", type(X_train_vec))
print("Shape X_train_vec =", X_train_vec.shape)

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])



Type X_train_vec = <class 'numpy.ndarray'>
Shape X_train_vec = (1500, 44922)


In [12]:
X_test_vec = vect.transform(X_test).toarray()
display(X_test_vec)

print("\n")
print("Type X_test_vec =", type(X_test_vec))
print("Shape X_test_vec =", X_test_vec.shape)

array([[0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.06059365, 0.        , ..., 0.        , 0.        ,
        0.        ]])



Type X_test_vec = <class 'numpy.ndarray'>
Shape X_test_vec = (500, 44922)


Default Parameters LinearSVC
----------------------------
* penalty=’l2’,
* loss=’squared_hinge’,
* dual=True,
* tol=0.0001,
* C=1.0,
* multi_class=’ovr’,
* fit_intercept=True,
* intercept_scaling=1,
* class_weight=None,
* verbose=0,
* random_state=None,
* max_iter=1000

In [13]:
BATCH_SIZE = 128
NUM_CLASSES = 2
EPOCHS = 10

INPUT_SIZE = 44922
TRAIN_EXAMPLES = 1500
TEST_EXAMPLES = 500

In [14]:
x_train_vec = X_train_vec.reshape(TRAIN_EXAMPLES, INPUT_SIZE)
x_test_vec = X_test_vec.reshape(TEST_EXAMPLES, INPUT_SIZE)

In [15]:
model = Sequential([
    Dense(250,
          input_shape=(INPUT_SIZE,),
          activation='relu',
          kernel_regularizer=regularizers.l2(0.01),
    ),
    Dense(1,
          activation='softmax',
          kernel_regularizer=regularizers.l2(0.01),
    ),
])
model.compile(
    loss='binary_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

In [16]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 250)               11230750  
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 251       
Total params: 11,231,001
Trainable params: 11,231,001
Non-trainable params: 0
_________________________________________________________________


In [17]:
history = model.fit(
            x_train_vec,
            y_train,
            batch_size=BATCH_SIZE,
            epochs=EPOCHS,
            verbose=1,
            validation_data=(x_test_vec, y_test_orginal)
        )

Train on 1500 samples, validate on 500 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
