<a href="https://colab.research.google.com/github/jgermanob/JobOffersClassifier/blob/master/Notebooks/Split/LSTM_trainable_split.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Acceso a drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Bibliotecas necesarias

In [None]:
!pip3 install pickle5

Collecting pickle5
[?25l  Downloading https://files.pythonhosted.org/packages/f7/4c/5c4dd0462c8d3a6bc4af500a6af240763c2ebd1efdc736fc2c946d44b70a/pickle5-0.0.11.tar.gz (132kB)
[K     |██▌                             | 10kB 19.3MB/s eta 0:00:01[K     |█████                           | 20kB 24.7MB/s eta 0:00:01[K     |███████▍                        | 30kB 19.3MB/s eta 0:00:01[K     |██████████                      | 40kB 6.9MB/s eta 0:00:01[K     |████████████▍                   | 51kB 8.4MB/s eta 0:00:01[K     |██████████████▉                 | 61kB 9.7MB/s eta 0:00:01[K     |█████████████████▍              | 71kB 10.0MB/s eta 0:00:01[K     |███████████████████▉            | 81kB 11.0MB/s eta 0:00:01[K     |██████████████████████▎         | 92kB 11.1MB/s eta 0:00:01[K     |████████████████████████▉       | 102kB 8.9MB/s eta 0:00:01[K     |███████████████████████████▎    | 112kB 8.9MB/s eta 0:00:01[K     |█████████████████████████████▊  | 122kB 8.9MB/s eta 0:00:0

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import Input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adadelta
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import plot_model
from tensorflow.keras.utils import to_categorical
from gensim.models import KeyedVectors
import tensorflow.keras.backend as K
import numpy as np
import io
import datetime
import pickle5 as pickle
from sklearn.metrics import classification_report

## Definición del modelo

In [None]:
class LSTM:
  def __init__(self, input_dim, embedding_matrix, input_length=300):
    output_dim = 300
    input = Input(shape=(input_length,), name='input_1')
    embeddings = layers.Embedding(input_dim+1,output_dim,input_length=input_length, weights=[embedding_matrix], trainable=True)
    lstm = layers.LSTM(100, name='lstm_layer')

    embedding_output = embeddings(input)

    embedding_output = layers.SpatialDropout1D(0.2)(embedding_output)

    lstm_output = lstm(embedding_output)
    
    predictions = layers.Dense(23, activation='softmax', name='dense_layer')(lstm_output)
    self.model = Model([input],predictions)
    self.__compile()
    #plot_model(self.model, to_file='/content/drive/My Drive/dicode/Backtranslation/lstm_model.png', show_shapes=True, show_dtype=True)
    print(self.model.summary())
  
  def __compile(self):
      self.model.compile(loss = tf.keras.losses.CategoricalCrossentropy() , optimizer='adam', metrics=['accuracy'])
    
  def fit(self, input, targets, epochs=5, batch_size=128):
      early_stopping_monitor = EarlyStopping(monitor = 'loss',patience = 20, min_delta=0.00001)
      callbacks = [early_stopping_monitor]
      history = self.model.fit([input], targets, epochs = epochs, batch_size = batch_size, callbacks = callbacks)
    
  def predict(self, input):
      return self.model.predict([input])
        
  def evaluate(self, input, targets, batch_size=128):
      return self.model.evaluate([input], targets, batch_size=batch_size)


## Entrenamiento y evaluación

In [None]:
def load_data(path):
  data = np.load(path)
  return data['arr_0']

In [None]:
x_train = load_data('/content/drive/My Drive/dicode/Backtranslation/Split/Train/x_train.npz')
y_train = load_data('/content/drive/My Drive/dicode/Backtranslation/Split/Train/y_train.npz')
print(x_train.shape)
print(y_train.shape)
x_test = load_data('/content/drive/My Drive/dicode/Backtranslation/Split/Test/x_test.npz')
y_test = load_data('/content/drive/My Drive/dicode/Backtranslation/Split/Test/y_test.npz')
embedding_matrix = load_data('/content/drive/My Drive/dicode/Backtranslation/Split/Train/embedding_matrix_train.npz')
with open('/content/drive/My Drive/dicode/Backtranslation/Split/Train/tokenizer_train.pickle', 'rb') as handle:
  tokenizer = pickle.load(handle)

input_dim = len(tokenizer.word_index)
lstm_model = LSTM(input_dim=input_dim, embedding_matrix=embedding_matrix)
lstm_model.fit(x_train, y_train, epochs=10)
lstm_model.evaluate(x_test, y_test)
y_pred = lstm_model.predict(x_test)
y_predicted = np.argmax(y_pred, axis=-1)
y_true = np.argmax(y_test, axis=-1)
report = classification_report(y_true, y_predicted)
print(report)
output_file = open('/content/drive/My Drive/dicode/Backtranslation/Resultados/Split/classification_report_lstm_trainable.txt', 'w',encoding='utf8')
output_file.write(report)
output_file.close()



(94286, 300)
(94286, 23)
Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 300)]             0         
_________________________________________________________________
embedding (Embedding)        (None, 300, 300)          43572000  
_________________________________________________________________
spatial_dropout1d (SpatialDr (None, 300, 300)          0         
_________________________________________________________________
lstm_layer (LSTM)            (None, 100)               160400    
_________________________________________________________________
dense_layer (Dense)          (None, 23)                2323      
Total params: 43,734,723
Trainable params: 43,734,723
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/

NameError: ignored