In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
import requests
import pandas as pd
import os
import numpy as np

In [0]:
TEAM_TOKEN = '1f4c93c4e114d6e72706c31901101db7vv'
SERVER_URL_TASK1 = 'http://erisk.irlab.org/challenge-t1e/getwritings/' + TEAM_TOKEN
SERVER_URL_TASK2 = 'http://erisk.irlab.org/challenge-t2/getwritings/' + TEAM_TOKEN

In [0]:
SST_HOME='drive/My Drive/Colab Notebooks/Erisk2019/'
PATH_SERVER_TASK1=SST_HOME+'datas/unoficcial server data/task1/test.csv'
PATH_SERVER_TASK2=SST_HOME+'datas/unoficcial server data/task2/test.csv'

In [0]:
def ask_server_df(url):
  """
  This function performs a get request to the servers and returns the data as a dataframe.
  
  Returns None if any problem occurs.
  """
  r = requests.get(url)
  if r.status_code == 200:
    df = pd.DataFrame(r.json())
    df = df.rename(index=str, columns={"nick": "ID", "date": "DATE", "title":"TITLE", "redditor":"INFO", "content":"TEXT"})
    df = df.set_index(['ID', 'DATE'])
    return df
  else:
    return None

In [0]:
df_t1 = ask_server_df(SERVER_URL_TASK1)
# df_t2 = ask_server_df(SERVER_URL_TASK2)

In [7]:
df_t1

Unnamed: 0_level_0,Unnamed: 1_level_0,TEXT,id,number,INFO,TITLE
ID,DATE,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
subject1001,2015-09-11T22:56:44.000+0000,got it,382143,395,365324,
subject1044,2016-04-13T07:55:01.000+0000,It did actually! Just jabjabjab!,320240,395,297535,
subject1074,2017-02-06T21:05:32.000+0000,Came here to say the same thing lol. The stup...,233012,395,224363,
subject1157,2014-03-19T17:24:03.000+0000,Please tell me there's a higher-resolution or...,465293,395,432096,
subject1164,2018-02-01T12:03:57.000+0000,I see people are more accepting of apple now,482997,395,468872,
subject1165,2016-04-12T11:32:54.000+0000,,477063,395,468868,NYTimes: Is the Worlds Best Croissant Made in...
subject1171,2018-01-10T02:52:47.000+0000,"Now, defense Jayhawks",239066,395,224370,
subject1174,2017-05-12T00:13:50.000+0000,Protip: if you want to search for something ...,475224,395,468864,
subject1183,2018-03-31T04:07:38.000+0000,What games did you end up trying?,508389,395,507953,
subject1208,2016-09-21T18:18:47.000+0000,It got Solar beam! :D,447,395,1,


In [0]:
def append_to_df(path, df, copy=True):
  """
  This function appends the new recorded data to an already existing csv. 
  
  If copy is set to True (default) it creates a copy of the csv files.
  """
  
  if os.path.exists(path):
    df2 = pd.read_csv(path, index_col=['ID', 'DATE'])
    if copy:
      df2.to_csv(path[:-4]+'_old'+path[-4:])
      
    df = df.append(df2).drop_duplicates()
    df.to_csv(path)
    
  else:
    df.to_csv(path)

In [9]:
r = requests.get(SERVER_URL_TASK1)

KeyboardInterrupt: ignored

In [0]:
r.body

In [0]:
df2 = pd.read_csv(PATH_SERVER_TASK1, index_col=['ID', 'DATE'])


In [0]:
len(df2), len(df_t1)

In [0]:
append_to_df(PATH_SERVER_TASK1, df_t1, copy=False)
# append_to_df(PATH_SERVER_TASK2, df_t2, copy=False)

Load and Ready Data
=================

In [0]:
records_df = pd.read_csv(PATH_SERVER_TASK1)
records_df = records_df.set_index(['ID', 'DATE'])
records_df["TITE"] = records_df["TEXT"] + records_df["TITLE"]


records_x_subject = []
for subject in records_df.index.get_level_values(0).unique():
  records_x_subject.append(records_df.loc[subject]["TITE"].tolist())
  
records_x_simple = []
for subject in records_x_subject:
  for writting in subject:
    records_x_simple.append(writting)

In [0]:
len(records_x_simple), len(records_x_subject)

Tokenize
========

In [0]:
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from sklearn.feature_extraction.text import TfidfVectorizer

import nltk
from nltk.corpus import stopwords
nltk.download('stopwords')

stop_words=stopwords.words('english')
# Define maximum vocabulary length
MAX_WORDS = 5000

In [0]:
# We are using this function to clean the test set
def tokenize_clean_text(text, tfidf=True, tokenizer=None, max_length=None, max_words=MAX_WORDS):
  """
  This function is in charge of tokenizing the text it is given. It also cleans
  the text from stop-words, punctuation, and gives a special token to numbers.
  
  :param text: The texts to tokenize in a bidimensional python array.
  
  :returns: The tokenized and cleaned text in a bidimensional python array.
            The tokenizer used to preprocess the text.
            The maximum length used for padding.
  """   
  # set [removed] as a special token
  text_removed = [t.replace("[removed]", "R3MOV3D") for t in text]
  
  # We remove the numbers
  cropped_numbers_text = [" ".join([word if not word.isdigit() else ""
                                for word in sentence.split()])
                               for sentence in text_removed]
  
  # Delete stopwords as well as every word less than 3 chars.
  cropped_numbers_stopw_text = [" ".join([word if not (word in stop_words or len(word) <= 3) else ""
                                      for word in sentence.split()])
                                     for sentence in cropped_numbers_text]
  
  if tfidf:
    vec = TfidfVectorizer(max_features=max_words)
    tfidf_mat = vec.fit_transform(cropped_numbers_stopw_text).toarray()
    tfid_words = vec.get_feature_names()

    cropped_numbers_stopw_tfidf_text = [" ".join([word if word in tfid_words else ""
                                            for word in sentence.split()])
                                            for sentence in cropped_numbers_stopw_text]
  
  if tokenizer is None:
    tokenizer = Tokenizer(num_words=max_words) # They use 5k words too
    tokenizer.fit_on_texts(cropped_numbers_stopw_tfidf_text if tfidf else cropped_numbers_stopw_text)
  # We tokenize the sentences
  tokenized_text = tokenizer.texts_to_sequences(cropped_numbers_stopw_tfidf_text if tfidf else cropped_numbers_stopw_text)
  
  if max_length == None:
    max_length = 0
    for sentence in tokenized_text:
      max_length = max_length if len(sentence) < max_length else len(sentence)
  
  # Now we return the padded the sequences.
  return pad_sequences(tokenized_text, max_length), tokenizer, max_length, cropped_numbers_stopw_tfidf_text if tfidf else cropped_numbers_stopw_text
  

In [0]:
# load the tokenizer
import joblib
PATH_CHAR_TOKENIZER = SST_HOME + "DL/char_tokenizer.pkl"
PATH_TOKENIZER = SST_HOME + "DL/tokenizer.pkl"
tokenizer = joblib.load(PATH_TOKENIZER)
char_tokenizer = joblib.load(PATH_CHAR_TOKENIZER)

In [0]:
records_x_token, _, _, results_x_clean = tokenize_clean_text(records_x_simple, tokenizer=tokenizer)

In [0]:
# the token length will be set to 50
max_length = 50
records_x_token_crop = records_x_token[:,-max_length:]

In [0]:
records_df["TOKENIZED"] = records_x_token_crop.tolist()

In [0]:
results_x_char = [[c for c in instance] for instance in results_x_clean]
# the char length will be set to 50
max_char_length = 400

results_x_char_padded = [["."]*(max_char_length - len(instance)) + instance if len(instance) < max_char_length else instance for instance in results_x_char]
results_x_char_crop = [instance[-max_char_length:] for instance in results_x_char_padded]

In [0]:
def tokenize_chars(text, tokenizer=None, max_length=max_char_length):
  if tokenizer is None:
    tokenizer = Tokenizer() # They use 5k words too
    tokenizer.fit_on_texts(text)
  # We tokenize the sentences
  tokenized_text = tokenizer.texts_to_sequences(text)
  
  for i, t in enumerate(tokenized_text):
    if len(t) < max_length:
      tokenized_text[i] = [1] * (max_length - len(t)) + tokenized_text[i]
      
  return tokenized_text, tokenizer

In [0]:
results_x_char_crop_token, _ = tokenize_chars(results_x_char_crop, char_tokenizer)

In [0]:
records_df["TOK_CHAR"] = results_x_char_crop_token

Preprocess with A and D models
==========================

In [0]:
A_MODELS_PATH = SST_HOME + "DL/models/A/best_models/"
B_MODELS_PATH = SST_HOME + "DL/models/B/best_models/"
C_MODELS_PATH = SST_HOME + "DL/models/C/best_models/"
D_MODELS_PATH = SST_HOME + "DL/models/D/best_models/"
E_MODELS_PATH = SST_HOME + "DL/models/E/best_models/"


A_MODEL_PATH = A_MODELS_PATH + "Copia de load_emb_False_num_classes_1_emb_size_300_trainable_emb_True_cnn_size_128_cnn_filter_3_rnn_size_None_cell_type_LSTM_bidirectional_False_attention_False_dropout_0.5_dnn_size_32_batch_size_1024"
B_MODEL_PATH = B_MODELS_PATH + "Copia de load_emb_False_num_classes_1_emb_size_300_trainable_emb_True_cnn_size_None_cnn_filter_3_rnn_size_64_cell_type_GRU_bidirectional_True_attention_False_dropout_0.5_dnn_size_32_batch_size_None"
C_MODEL_PATH = C_MODELS_PATH + "Copia de load_emb_False_num_classes_1_emb_size_None_trainable_emb_False_cnn_size_None_cnn_filter_3_rnn_size_64_cell_type_LSTM_bidirectional_True_attention_False_dropout_0.5_dnn_size_32_batch_size_1"
D_MODEL_PATH = D_MODELS_PATH + "Copia de load_emb_False_num_classes_1_emb_size_50_trainable_emb_True_cnn_size_128_cnn_filter_10_rnn_size_None_cell_type_LSTM_bidirectional_False_attention_False_dropout_0.5_dnn_size_64_batch_size_1024"
E_MODEL_PATH = E_MODELS_PATH + "Copia de load_emb_False_num_classes_1_emb_size_None_trainable_emb_False_cnn_size_None_cnn_filter_3_rnn_size_64_cell_type_GRU_bidirectional_False_attention_False_dropout_0.5_dnn_size_32_batch_size_1"

In [0]:
from keras.models import load_model
from keras.models import Sequential

In [0]:
a_model = load_model(A_MODEL_PATH)
a_model.layers.pop()
a_model.layers.pop()
# We pop two layers, being them the output and the last Dropout layers
c_pre_model = Sequential()

for layer in a_model.layers:
  c_pre_model.add(layer)

In [0]:
d_model = load_model(D_MODEL_PATH)
d_model.layers.pop()
d_model.layers.pop()
# We pop two layers, being them the output and the last dropout ones
e_pre_model = Sequential()

for layer in d_model.layers:
  e_pre_model.add(layer)


In [0]:
instances = np.array(records_df["TOKENIZED"].tolist())
char_instances = np.array(records_df["TOK_CHAR"].tolist())

In [0]:
embedded_text = c_pre_model.predict(instances)
embedded_chars = e_pre_model.predict(char_instances)

In [0]:
records_df["EMBEDDINGS"] = embedded_text.tolist()
records_df["CHAR_EMBED"] = embedded_chars.tolist()

In [0]:
records_subjects_stacked = []
records_subjects_embeddings = []
records_subjects_char_embeddings = []

for subject in records_df.index.get_level_values(0).unique():
  # B model
  for writting in records_df.loc[subject]["TOKENIZED"].tolist():
    while 0 in writting:
      writting.remove(0)
    writting = writting[-350:]
    if len(writting) < 350:
      writting = [0]*(350 - len(writting)) + writting
  records_subjects_stacked.append(writting)
  records_subjects_embeddings.append(records_df.loc[subject]["EMBEDDINGS"].tolist())
  records_subjects_char_embeddings.append(records_df.loc[subject]["CHAR_EMBED"].tolist())

In [0]:
records_df["TOKENIZED"] = records_x_token_crop.tolist()

Load Models
===========

In [0]:
# load_models
a_model = load_model(A_MODEL_PATH)
b_model = load_model(B_MODEL_PATH)
c_model = load_model(C_MODEL_PATH)
d_model = load_model(D_MODEL_PATH)
e_model = load_model(E_MODEL_PATH)

In [0]:
e_model.summary()

In [0]:
32records_df["AVALUE"] = np.nan
records_df["BVALUE"] = np.nan
records_df["CVALUE"] = np.nan
records_df["DVALUE"] = np.nan
records_df["EVALUE"] = np.nan

In [0]:
# Predictions

# A model
records_df["AVALUE"] = a_model.predict(np.array(records_df["TOKENIZED"].values.tolist()))
print("A done")

# B model
b_predictions = b_model.predict(np.array(records_subjects_stacked))
print("B done")

# C model
c_predictions = [c_model.predict(np.array([s_c])) for s_c in records_subjects_embeddings]
print("C done")

# D model
records_df["DVALUE"] = d_model.predict(np.array(records_df["TOK_CHAR"].values.tolist()))
print("D done")

# E model
e_predictions = [e_model.predict(np.array([s_e])) for s_e in records_subjects_char_embeddings]
print("E done")

Subjects results
=============

In [0]:
a_payload = []
b_payload = []
c_payload = []
d_payload = []
e_payload = []

for i, subject in enumerate(records_df.index.get_level_values(0).unique()):
  base_data = {"nick": subject, "decision": 0, "score": 0}
  
  # A model
  base_data["score"] = np.mean(records_df.loc[subject]["AVALUE"].values).item()
  base_data["decision"] = 0 if base_data["score"] < 0.4 else 1
  a_payload.append(base_data)
  
  # B model
  base_data["score"] = b_predictions[i][0].item()
  base_data["decision"] = 0 if base_data["score"] < 0.4 else 1
  b_payload.append(base_data)

  # C model
  base_data["score"] = c_predictions[i][0].item()
  base_data["decision"] = 0 if base_data["score"] < 0.8 else 1
  c_payload.append(base_data)
  
  # D model
  base_data["score"] = np.mean(records_df.loc[subject]["DVALUE"].values).item()
  base_data["decision"] = 0 if base_data["score"] < 0.3 else 1
  d_payload.append(base_data)
  
  # E model
  base_data["score"] = e_predictions[i][0].item()
  base_data["decision"] = 0 if base_data["score"] < 0.6 else 1
  e_payload.append(base_data)
  
predictions_list = [a_payload, b_payload, c_payload, d_payload, e_payload]

Post Results
===========

In [0]:
for i, predictions in enumerate(predictions_list):
  print(i)
  post_url_challenge1 = "http://erisk.irlab.org/challenge-t1/submit/{}/{}".format(TEAM_TOKEN, i)
  r = requests.post(post_url_challenge1, json=predictions)
  
  print(r.status_code)
  print(r.json())

ALL TOGETHER
=============

In [0]:
# -*- coding: utf-8 -*-
"""erisk_client.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1lIuI4CqwgWBkbuhT-eMU6bACiLvU5vCE
"""

from google.colab import drive
drive.mount('/content/drive')

while True:
  import gc

  import requests
  import pandas as pd
  import os
  import numpy as np

  TEAM_TOKEN = '1f4c93c4e114d6e72706c31901101db7vv'
  SERVER_URL_TASK1 = 'http://erisk.irlab.org/challenge-t1e/getwritings/' + TEAM_TOKEN
  SERVER_URL_TASK2 = 'http://erisk.irlab.org/challenge-t2/getwritings/' + TEAM_TOKEN

  SST_HOME='drive/My Drive/Colab Notebooks/Erisk2019/'
  PATH_SERVER_TASK1=SST_HOME+'datas/unoficcial server data/task1/test.csv'
  PATH_SERVER_TASK2=SST_HOME+'datas/unoficcial server data/task2/test.csv'
  single_num_prev = 0


  def ask_server_df(url):
    """
    This function performs a get request to the servers and returns the data as a dataframe.

    Returns None if any problem occurs.
    """
    r = requests.get(url)
    if r.status_code == 200:
      df = pd.DataFrame(r.json())
      df = df.rename(index=str, columns={"nick": "ID", "date": "DATE", "title":"TITLE", "redditor":"INFO", "content":"TEXT"})
      df = df.set_index(['ID', 'DATE'])
      return df
    else:
      return None

  df_t1 = ask_server_df(SERVER_URL_TASK1)
  df_t2 = ask_server_df(SERVER_URL_TASK2)

  def append_to_df(path, df, copy=True):
    """
    This function appends the new recorded data to an already existing csv. 

    If copy is set to True (default) it creates a copy of the csv files.
    """

    if os.path.exists(path):
      df2 = pd.read_csv(path, index_col=['ID', 'DATE'])
      if copy:
        df2.to_csv(path[:-4]+'_old'+path[-4:])

      df = df.append(df2).drop_duplicates()
      df.to_csv(path)

    else:
      df.to_csv(path)

  if not df_t1 is None:
    append_to_df(PATH_SERVER_TASK1, df_t1, copy=False)
    
  if not df_t2 is None:
    append_to_df(PATH_SERVER_TASK2, df_t2, copy=False)

  """Load and Ready Data
  =================
  """

  records_df = pd.read_csv(PATH_SERVER_TASK1)
  records_df = records_df.set_index(['ID', 'DATE'])
  records_df["TITE"] = records_df["TEXT"] + records_df["TITLE"]


  records_x_subject = []
  for subject in records_df.index.get_level_values(0).unique():
    records_x_subject.append(records_df.loc[subject]["TITE"].tolist())

  records_x_simple = []
  for subject in records_x_subject:
    for writting in subject:
      records_x_simple.append(writting)

  print(len(records_x_simple), len(records_x_subject))
  
  if len(records_x_simple) == single_num_prev:
    break
  
  single_num_prev = len(records_x_simple)

  """Tokenize
  ========
  """

  from keras.preprocessing.text import Tokenizer
  from keras.preprocessing.sequence import pad_sequences
  from sklearn.feature_extraction.text import TfidfVectorizer

  import nltk
  from nltk.corpus import stopwords
  nltk.download('stopwords')

  stop_words=stopwords.words('english')
  # Define maximum vocabulary length
  MAX_WORDS = 5000

  # We are using this function to clean the test set
  def tokenize_clean_text(text, tfidf=True, tokenizer=None, max_length=None, max_words=MAX_WORDS):
    """
    This function is in charge of tokenizing the text it is given. It also cleans
    the text from stop-words, punctuation, and gives a special token to numbers.

    :param text: The texts to tokenize in a bidimensional python array.

    :returns: The tokenized and cleaned text in a bidimensional python array.
              The tokenizer used to preprocess the text.
              The maximum length used for padding.
    """   
    # set [removed] as a special token
    text_removed = [t.replace("[removed]", "R3MOV3D") for t in text]

    # We remove the numbers
    cropped_numbers_text = [" ".join([word if not word.isdigit() else ""
                                  for word in sentence.split()])
                                 for sentence in text_removed]

    # Delete stopwords as well as every word less than 3 chars.
    cropped_numbers_stopw_text = [" ".join([word if not (word in stop_words or len(word) <= 3) else ""
                                        for word in sentence.split()])
                                       for sentence in cropped_numbers_text]

    if tfidf:
      vec = TfidfVectorizer(max_features=max_words)
      tfidf_mat = vec.fit_transform(cropped_numbers_stopw_text).toarray()
      tfid_words = vec.get_feature_names()

      cropped_numbers_stopw_tfidf_text = [" ".join([word if word in tfid_words else ""
                                              for word in sentence.split()])
                                              for sentence in cropped_numbers_stopw_text]

    if tokenizer is None:
      tokenizer = Tokenizer(num_words=max_words) # They use 5k words too
      tokenizer.fit_on_texts(cropped_numbers_stopw_tfidf_text if tfidf else cropped_numbers_stopw_text)
    # We tokenize the sentences
    tokenized_text = tokenizer.texts_to_sequences(cropped_numbers_stopw_tfidf_text if tfidf else cropped_numbers_stopw_text)

    if max_length == None:
      max_length = 0
      for sentence in tokenized_text:
        max_length = max_length if len(sentence) < max_length else len(sentence)

    # Now we return the padded the sequences.
    return pad_sequences(tokenized_text, max_length), tokenizer, max_length, cropped_numbers_stopw_tfidf_text if tfidf else cropped_numbers_stopw_text

  # load the tokenizer
  import joblib
  PATH_CHAR_TOKENIZER = SST_HOME + "DL/char_tokenizer.pkl"
  PATH_TOKENIZER = SST_HOME + "DL/tokenizer.pkl"
  tokenizer = joblib.load(PATH_TOKENIZER)
  char_tokenizer = joblib.load(PATH_CHAR_TOKENIZER)

  records_x_token, _, _, results_x_clean = tokenize_clean_text(records_x_simple, tokenizer=tokenizer)

  # the token length will be set to 50
  max_length = 50
  records_x_token_crop = records_x_token[:,-max_length:]

  records_df["TOKENIZED"] = records_x_token_crop.tolist()

  results_x_char = [[c for c in instance] for instance in results_x_clean]
  # the char length will be set to 50
  max_char_length = 400

  results_x_char_padded = [["."]*(max_char_length - len(instance)) + instance if len(instance) < max_char_length else instance for instance in results_x_char]
  results_x_char_crop = [instance[-max_char_length:] for instance in results_x_char_padded]

  def tokenize_chars(text, tokenizer=None, max_length=max_char_length):
    if tokenizer is None:
      tokenizer = Tokenizer() # They use 5k words too
      tokenizer.fit_on_texts(text)
    # We tokenize the sentences
    tokenized_text = tokenizer.texts_to_sequences(text)

    for i, t in enumerate(tokenized_text):
      if len(t) < max_length:
        tokenized_text[i] = [1] * (max_length - len(t)) + tokenized_text[i]

    return tokenized_text, tokenizer

  results_x_char_crop_token, _ = tokenize_chars(results_x_char_crop, char_tokenizer)

  records_df["TOK_CHAR"] = results_x_char_crop_token

  """Preprocess with A and D models
  ==========================
  """

  A_MODELS_PATH = SST_HOME + "DL/models/A/best_models/"
  B_MODELS_PATH = SST_HOME + "DL/models/B/best_models/"
  C_MODELS_PATH = SST_HOME + "DL/models/C/best_models/"
  D_MODELS_PATH = SST_HOME + "DL/models/D/best_models/"
  E_MODELS_PATH = SST_HOME + "DL/models/E/best_models/"


  A_MODEL_PATH = A_MODELS_PATH + "Copia de load_emb_False_num_classes_1_emb_size_300_trainable_emb_True_cnn_size_128_cnn_filter_3_rnn_size_None_cell_type_LSTM_bidirectional_False_attention_False_dropout_0.5_dnn_size_32_batch_size_1024"
  B_MODEL_PATH = B_MODELS_PATH + "Copia de load_emb_False_num_classes_1_emb_size_300_trainable_emb_True_cnn_size_None_cnn_filter_3_rnn_size_64_cell_type_GRU_bidirectional_True_attention_False_dropout_0.5_dnn_size_32_batch_size_None"
  C_MODEL_PATH = C_MODELS_PATH + "Copia de load_emb_False_num_classes_1_emb_size_None_trainable_emb_False_cnn_size_None_cnn_filter_3_rnn_size_64_cell_type_LSTM_bidirectional_True_attention_False_dropout_0.5_dnn_size_32_batch_size_1"
  D_MODEL_PATH = D_MODELS_PATH + "Copia de load_emb_False_num_classes_1_emb_size_50_trainable_emb_True_cnn_size_128_cnn_filter_10_rnn_size_None_cell_type_LSTM_bidirectional_False_attention_False_dropout_0.5_dnn_size_64_batch_size_1024"
  E_MODEL_PATH = E_MODELS_PATH + "Copia de load_emb_False_num_classes_1_emb_size_None_trainable_emb_False_cnn_size_None_cnn_filter_3_rnn_size_64_cell_type_GRU_bidirectional_False_attention_False_dropout_0.5_dnn_size_32_batch_size_1"

  from keras.models import load_model
  from keras.models import Sequential

  a_model = load_model(A_MODEL_PATH)
  a_model.layers.pop()
  a_model.layers.pop()
  # We pop two layers, being them the output and the last Dropout layers
  c_pre_model = Sequential()

  for layer in a_model.layers:
    c_pre_model.add(layer)

  d_model = load_model(D_MODEL_PATH)
  d_model.layers.pop()
  d_model.layers.pop()
  # We pop two layers, being them the output and the last dropout ones
  e_pre_model = Sequential()

  for layer in d_model.layers:
    e_pre_model.add(layer)

  instances = np.array(records_df["TOKENIZED"].tolist())
  char_instances = np.array(records_df["TOK_CHAR"].tolist())

  embedded_text = c_pre_model.predict(instances)
  embedded_chars = e_pre_model.predict(char_instances)

  records_df["EMBEDDINGS"] = embedded_text.tolist()
  records_df["CHAR_EMBED"] = embedded_chars.tolist()

  records_subjects_stacked = []
  records_subjects_embeddings = []
  records_subjects_char_embeddings = []

  for subject in records_df.index.get_level_values(0).unique():
    # B model
    for writting in records_df.loc[subject]["TOKENIZED"].tolist():
      while 0 in writting:
        writting.remove(0)
      writting = writting[-350:]
      if len(writting) < 350:
        writting = [0]*(350 - len(writting)) + writting
    records_subjects_stacked.append(writting)
    records_subjects_embeddings.append(records_df.loc[subject]["EMBEDDINGS"].tolist())
    records_subjects_char_embeddings.append(records_df.loc[subject]["CHAR_EMBED"].tolist())

  records_df["TOKENIZED"] = records_x_token_crop.tolist()

  """Load Models
  ===========
  """

  # load_models
  a_model = load_model(A_MODEL_PATH)
  b_model = load_model(B_MODEL_PATH)
  c_model = load_model(C_MODEL_PATH)
  d_model = load_model(D_MODEL_PATH)
  e_model = load_model(E_MODEL_PATH)

  records_df["AVALUE"] = np.nan
  records_df["BVALUE"] = np.nan
  records_df["CVALUE"] = np.nan
  records_df["DVALUE"] = np.nan
  records_df["EVALUE"] = np.nan

  # Predictions

  # A model
  records_df["AVALUE"] = a_model.predict(np.array(records_df["TOKENIZED"].values.tolist()))
  print("A done")

  # B model
  b_predictions = b_model.predict(np.array(records_subjects_stacked))
  print("B done")

  # C model
  c_predictions = [c_model.predict(np.array([s_c])) for s_c in records_subjects_embeddings]
  print("C done")

  # D model
  records_df["DVALUE"] = d_model.predict(np.array(records_df["TOK_CHAR"].values.tolist()))
  print("D done")

  # E model
  e_predictions = [e_model.predict(np.array([s_e])) for s_e in records_subjects_char_embeddings]
  print("E done")

  """Subjects results
  =============
  """

  a_payload = []
  b_payload = []
  c_payload = []
  d_payload = []
  e_payload = []

  for i, subject in enumerate(records_df.index.get_level_values(0).unique()):
    base_data = {"nick": subject, "decision": 0, "score": 0}

    # A model
    base_data["score"] = np.mean(records_df.loc[subject]["AVALUE"].values).item()
    base_data["decision"] = 0 if base_data["score"] < 0.4 else 1
    a_payload.append(base_data.copy())

    # B model
    base_data["score"] = b_predictions[i][0].item()
    base_data["decision"] = 0 if base_data["score"] < 0.1 else 1
    b_payload.append(base_data.copy())

    # C model
    base_data["score"] = c_predictions[i][0].item()
    base_data["decision"] = 0 if base_data["score"] < 0.9 else 1
    c_payload.append(base_data.copy())

    # D model
    base_data["score"] = np.mean(records_df.loc[subject]["DVALUE"].values).item()
    base_data["decision"] = 0 if base_data["score"] < 0.3 else 1
    d_payload.append(base_data.copy())

    # E model
    base_data["score"] = e_predictions[i][0].item()
    base_data["decision"] = 0 if base_data["score"] < 0.6 else 1
    e_payload.append(base_data.copy())

  predictions_list = [a_payload, b_payload, c_payload, d_payload, e_payload]

  """Post Results
  ===========
  """
  
  with open(SST_HOME+"kiwi.txt", "w") as results_file:
    for i, predictions in enumerate(predictions_list):
      print(i)
      post_url_challenge1 = "http://erisk.irlab.org/challenge-t1e/submit/{}/{}".format(TEAM_TOKEN, i)
      print(post_url_challenge1)
      results_file.write(str(predictions))
      
      r = requests.post(post_url_challenge1, json=predictions)

      print(r.status_code)
      print(r.json())
          
  #%reset -f

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
149092 628
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
A done
B done
C done
D done
E done
0
http://erisk.irlab.org/challenge-t1e/submit/1f4c93c4e114d6e72706c31901101db7vv/0
200
[]
1
http://erisk.irlab.org/challenge-t1e/submit/1f4c93c4e114d6e72706c31901101db7vv/1
200
[]
2
http://erisk.irlab.org/challenge-t1e/submit/1f4c93c4e114d6e72706c31901101db7vv/2
200
[]
3
http://erisk.irlab.org/challenge-t1e/submit/1f4c93c4e114d6e72706c31901101db7vv/3
200
[]
4
http://erisk.irlab.org/challenge-t1e/submit/1f4c93c4e114d6e72706c31901101db7vv/4
200
[]
149092 628
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
