# Réseaux de neurones

Voici la liste des réseaux de neurones que nous allons implémenter :

1. DNN
2. LSTM
3. GRU
4. BRNN
5. RCNN

### Initialisation

In [1]:
import keras
from keras import models
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, LSTM, Bidirectional, Input, Reshape, GRU, Convolution1D, Flatten
from sklearn import metrics
from keras import backend as K
from sklearn.model_selection import train_test_split
import time

In [2]:
import tensorflow as tf
from tensorflow import keras
import pandas as pd
import numpy as np
from numpy import mean
from numpy import std
from numpy import dstack
from pandas import read_csv
from matplotlib import pyplot
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Dropout
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D, AveragePooling1D
from keras.layers import GlobalMaxPooling1D
from keras.layers import GlobalAveragePooling1D
import pathlib
import joblib
from sklearn import svm
import sklearn.model_selection as model_selection
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import f1_score
from sklearn.metrics import recall_score
import time
import os
!pip install memory_profiler
import tracemalloc

Collecting memory_profiler
  Downloading memory_profiler-0.58.0.tar.gz (36 kB)
Building wheels for collected packages: memory-profiler
  Building wheel for memory-profiler (setup.py) ... [?25l[?25hdone
  Created wheel for memory-profiler: filename=memory_profiler-0.58.0-py3-none-any.whl size=30190 sha256=2b682f762ffb95693beea6d38278e67822f4e94b7a99340ca2e4336b9274dc30
  Stored in directory: /root/.cache/pip/wheels/56/19/d5/8cad06661aec65a04a0d6785b1a5ad035cb645b1772a4a0882
Successfully built memory-profiler
Installing collected packages: memory-profiler
Successfully installed memory-profiler-0.58.0


In [3]:
pip install pyyaml h5py



In [4]:
%load_ext memory_profiler

In [6]:
df = pd.read_csv("opinion_fact_news_pretraiter.csv")
df = df.drop(columns=["Unnamed: 0"])

In [7]:
df.loc[df["label"] == "fact","label"] = 1
df.loc[df["label"] == "opinion","label"] = 0
df['label']=df['label'].astype('int')

In [8]:
X = df['body'] 
ylabels = df['label'] 

### Bow (Bag of Word)

In [9]:
import gensim
from sklearn.feature_extraction.text import CountVectorizer,TfidfVectorizer
X_clear = []
for x in X:
 paragraph = gensim.utils.simple_preprocess(x)
 paragraph = ' '.join(paragraph)
 X_clear.append(paragraph)


vectorizer = CountVectorizer(analyzer='word', token_pattern=r'\w{1,}')
vectorizer.fit(X_clear)


X_data_count = vectorizer.transform(X_clear)

### Tf-Idf  (Term Frequency-Inverse Document Frequency)

In [10]:
tfidf_vect = TfidfVectorizer(analyzer='word', max_features=30000)
tfidf_vect.fit(X_clear)
X_data_tfidf =  tfidf_vect.transform(X_clear)

tfidf_vect_ngram = TfidfVectorizer(analyzer='word', max_features=30000, ngram_range=(3, 3))
tfidf_vect_ngram.fit(X_clear)
X_data_tfidf_ngram =  tfidf_vect_ngram.transform(X_clear)

tfidf_vect_ngram_char = TfidfVectorizer(analyzer='char', max_features=30000, ngram_range=(3, 3))
tfidf_vect_ngram_char.fit(X_clear)
X_data_tfidf_ngram_char =  tfidf_vect_ngram_char.transform(X_clear)

Après avoir implémenté TF-IDF, je remarque que la matrice que nous obtenons a une taille très grande, ainsi pour traiter cette matrice de manière brute nécessiteraient trop de temps et de mémoire.

On utilisera l'algorithme SVD (décomposition en valeurs singulières) pour réduire la dimension des données de la matrice que nous avons obtenue, tout en préservant les propriétés de la matrice d'origine.

In [11]:
from sklearn.decomposition import TruncatedSVD

svd = TruncatedSVD(n_components=300, random_state=42)
svd.fit(X_data_tfidf)
X_data_tfidf_svd = svd.transform(X_data_tfidf)

svd_ngram = TruncatedSVD(n_components=300, random_state=42)
svd_ngram.fit(X_data_tfidf_ngram)
X_data_tfidf_ngram_svd = svd_ngram.transform(X_data_tfidf_ngram)

svd_ngram_char = TruncatedSVD(n_components=300, random_state=42)
svd_ngram_char.fit(X_data_tfidf_ngram_char)
X_data_tfidf_ngram_char_svd = svd_ngram_char.transform(X_data_tfidf_ngram_char)

## DNN

In [None]:
def evaluate_model_DNN(nom, X_data,Y_data,n_epochs, drop_Out):
  verbose , batch_size = 0 , 32
  X_train, X_test, Y_train, Y_test = train_test_split(X_data, Y_data, test_size=0.2, random_state=42,stratify=Y_data)
  
  input_layer = Input(shape=(300,))

  layer = Dense(1024, activation='relu')(input_layer)
  layer = Dropout(drop_Out)(layer)
  layer = Dense(1024, activation='relu')(layer)
  layer = Dense(512, activation='relu')(layer)
  output_layer = Dense(1, activation='sigmoid')(layer)
  
  model = models.Model(input_layer, output_layer)

  start_time_train = time.time()
  model.compile("adam", loss='binary_crossentropy', metrics=['accuracy'])
  model.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=n_epochs, batch_size=512)
  end_time_train = time.time()
  time_train= (end_time_train - start_time_train)*1000
  time_train_val = round(time_train)


  accuracy = model.evaluate(X_test, Y_test, batch_size=batch_size, verbose=0)
  start_time_test = time.time()
  y_pred = model.predict(X_test)
  end_time_test = time.time()
  time_test= (end_time_test - start_time_test)*1000
  time_test_val=round(time_test)
  y_pred = np.around(y_pred, 0)
  #df_resultDL.loc[(len(df_resultDL)+1)] = {'Nom': nom, 'Drop_out': drop_Out,'Accuracy': metrics.accuracy_score(test_predictions, Y_test), 'Precision': metrics.precision_score(test_predictions, Y_test), 'Recall' : metrics.recall_score(test_predictions, Y_test)}
  print("Accuracy: ", metrics.accuracy_score(y_pred, Y_test))
  #co the sai---------------------------------------------
  start_time_test = time.time()
  model.predict(X_test)
  end_time_test = time.time()
  time_test= (end_time_test - start_time_test)*1000
  time_test_val=round(time_test)
  #--------------------------------------------------------------------------
  tracemalloc.start()
  model.predict(X_test)
  snapshot = tracemalloc.take_snapshot()
  top_stats = snapshot.statistics('traceback')
  stat = top_stats[0]
  mem_test = round(stat.size/1024)
  #------------------------------------------------------------
  # converture y_pred
 # output_TF=model.predict(X_test)
 #y_pred=(np.argmax(output_TF,axis=1)+1)
  
  precision = metrics.precision_score(y_pred,Y_test)
  precision_val=round(precision,4)

  recall= metrics.recall_score(y_pred,Y_test,average='macro')
  recal_val=round(recall,4)
 
  f1=metrics.f1_score(y_pred,Y_test,average='macro')
  f1_val=round(f1,4)
  
  accuracy_test = metrics.accuracy_score(y_pred,Y_test)
  accuracy_test_val = round(accuracy_test,4)

  params = np.sum([np.prod(v.get_shape()) for v in model.trainable_weights])
  
  export_dir="save_model/DNN"

  model.save(export_dir)

  model=tf.keras.models.load_model(export_dir)  

  taille= os.stat('save_model/DNN').st_size

  return [nom,n_epochs,drop_Out,precision_val,recal_val,f1_val,accuracy_test_val,time_train_val, time_test_val,mem_test,params,taille]

In [None]:
scores = []
def run_experiment():
  with open('resultat_DNN.txt', 'w') as f:
    for dropOut in [0, 0.2,0.5,0.8]:
      score= evaluate_model_DNN(nom='Deep_Neural_Network', X_data = X_data_tfidf_svd, Y_data = ylabels, n_epochs=20,drop_Out= dropOut)
      #print(score)
      f.write("{0}".format(score))
      scores.append(score)
    return scores

In [23]:
scores=run_experiment()

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Accuracy:  0.9915
INFO:tensorflow:Assets written to: DNN/assets


## LSTM

In [None]:
def evaluate_model_LSTM(nom, X_data,Y_data,n_epochs, drop_Out):
  verbose , batch_size = 0 , 32
  X_train, X_test, Y_train, Y_test = train_test_split(X_data, Y_data, test_size=0.2, random_state=42,stratify=Y_data)
  
  input_layer = Input(shape=(300,))

  layer = Reshape((10, 30))(input_layer)
  layer = LSTM(256, activation='relu', return_sequences=True)(layer)
  layer = Dropout(drop_Out)(layer)
  layer = LSTM(128, activation='relu')(layer)
  layer = Dense(512, activation='relu')(layer)
  layer = Dense(512, activation='relu')(layer)
  layer = Dense(128, activation='relu')(layer)
  output_layer = Dense(1, activation='sigmoid')(layer)

  model = models.Model(input_layer, output_layer)

  start_time_train = time.time()
  model.compile("adam", loss='binary_crossentropy', metrics=['accuracy'])
  model.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=n_epochs, batch_size=512)
  end_time_train = time.time()
  time_train= (end_time_train - start_time_train)*1000
  time_train_val = round(time_train)


  accuracy = model.evaluate(X_test, Y_test, batch_size=batch_size, verbose=0)
  start_time_test = time.time()
  y_pred = model.predict(X_test)
  end_time_test = time.time()
  time_test= (end_time_test - start_time_test)*1000
  time_test_val=round(time_test)
  y_pred = np.around(y_pred, 0)
  #df_resultDL.loc[(len(df_resultDL)+1)] = {'Nom': nom, 'Drop_out': drop_Out,'Accuracy': metrics.accuracy_score(test_predictions, Y_test), 'Precision': metrics.precision_score(test_predictions, Y_test), 'Recall' : metrics.recall_score(test_predictions, Y_test)}
  print("Accuracy: ", metrics.accuracy_score(y_pred, Y_test))
  #co the sai---------------------------------------------
  start_time_test = time.time()
  model.predict(X_test)
  end_time_test = time.time()
  time_test= (end_time_test - start_time_test)*1000
  time_test_val=round(time_test)
  #--------------------------------------------------------------------------
  tracemalloc.start()
  model.predict(X_test)
  snapshot = tracemalloc.take_snapshot()
  top_stats = snapshot.statistics('traceback')
  stat = top_stats[0]
  mem_test = round(stat.size/1024)
  #------------------------------------------------------------
  # converture y_pred
 # output_TF=model.predict(X_test)
 #y_pred=(np.argmax(output_TF,axis=1)+1)

  precision = metrics.precision_score(y_pred,Y_test)
  precision_val=round(precision,4)
 
  recall= metrics.recall_score(y_pred,Y_test,average='macro')
  recal_val=round(recall,4)

  f1=metrics.f1_score(y_pred,Y_test,average='macro')
  f1_val=round(f1,4)
 
  accuracy_test = metrics.accuracy_score(y_pred,Y_test)
  accuracy_test_val = round(accuracy_test,4)

  params = np.sum([np.prod(v.get_shape()) for v in model.trainable_weights])

  export_dir="save_model/DNN"

  model.save(export_dir)

  model=tf.keras.models.load_model(export_dir)  

  taille= os.stat('save_model/DNN').st_size   

  return [nom,n_epochs,drop_Out,precision_val,recal_val,f1_val,accuracy_test_val,time_train_val, time_test_val,mem_test,params,taille]

In [None]:
scores = []
def run_experiment():
  with open('save_model/resultat_LSTM.txt', 'w') as f:
    for dropOut in [0, 0.2,0.5,0.8]:
      score= evaluate_model_LSTM(nom='LSTM', X_data = X_data_tfidf_svd, Y_data = ylabels, n_epochs=20,drop_Out= dropOut)
      #print(score)
      f.write("{0}".format(score))
      scores.append(score)
    return scores

In [None]:
scores=run_experiment()

## GRU

In [None]:
def evaluate_model_GRU(nom, X_data,Y_data,n_epochs, drop_Out):
  verbose , batch_size = 0 , 32
  X_train, X_test, Y_train, Y_test = train_test_split(X_data, Y_data, test_size=0.2, random_state=42,stratify=Y_data)
  
  input_layer = Input(shape=(300,))

  layer = Reshape((10, 30))(input_layer)
  layer = GRU(128, activation='relu')(layer)
  layer = Dropout(drop_Out)(layer)
 # layer = GRU(64, activation='relu')(layer)
  layer = Dense(256, activation='relu')(layer)
  layer = Dense(128, activation='relu')(layer)
  output_layer = Dense(1, activation='sigmoid')(layer)

  model = models.Model(input_layer, output_layer)

  start_time_train = time.time()
  model.compile("adam", loss='binary_crossentropy', metrics=['accuracy'])
  model.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=n_epochs, batch_size=512)
  end_time_train = time.time()
  time_train= (end_time_train - start_time_train)*1000
  time_train_val = round(time_train)


  accuracy = model.evaluate(X_test, Y_test, batch_size=batch_size, verbose=0)
  start_time_test = time.time()
  y_pred = model.predict(X_test)
  end_time_test = time.time()
  time_test= (end_time_test - start_time_test)*1000
  time_test_val=round(time_test)
  y_pred = np.around(y_pred, 0)

  print("Accuracy: ", metrics.accuracy_score(y_pred, Y_test))

  start_time_test = time.time()
  model.predict(X_test)
  end_time_test = time.time()
  time_test= (end_time_test - start_time_test)*1000
  time_test_val=round(time_test)

  tracemalloc.start()
  model.predict(X_test)
  snapshot = tracemalloc.take_snapshot()
  top_stats = snapshot.statistics('traceback')
  stat = top_stats[0]
  mem_test = round(stat.size/1024)
  
  precision = metrics.precision_score(y_pred,Y_test)
  precision_val=round(precision,4)

  recall= metrics.recall_score(y_pred,Y_test,average='macro')
  recal_val=round(recall,4)

  f1=metrics.f1_score(y_pred,Y_test,average='macro')
  f1_val=round(f1,4)

  accuracy_test = metrics.accuracy_score(y_pred,Y_test)
  accuracy_test_val = round(accuracy_test,4)

  params = np.sum([np.prod(v.get_shape()) for v in model.trainable_weights])

  export_dir="save_model/GRU"

  model.save(export_dir)

  model=tf.keras.models.load_model(export_dir)  

  taille= os.stat('save_model/GRU').st_size   

  return [nom,n_epochs,drop_Out,precision_val,recal_val,f1_val,accuracy_test_val,time_train_val, time_test_val,mem_test,params,taille]

In [None]:
scores = []
def run_experiment():
  with open('save_model/resultat_GRU.txt', 'w') as f:
    for dropOut in [0, 0.2,0.5,0.8]:
      score= evaluate_model_GRU(nom='GRU', X_data = X_data_tfidf_svd, Y_data = ylabels, n_epochs=20,drop_Out= dropOut)
      #print(score)
      f.write("{0}".format(score))
      scores.append(score)
    return scores

In [None]:
scores=run_experiment()

## Bidirectional RNN

In [None]:
def evaluate_model_BRNN(nom, X_data,Y_data,n_epochs, drop_Out):
  verbose , batch_size = 0 , 32
  X_train, X_test, Y_train, Y_test = train_test_split(X_data, Y_data, test_size=0.2, random_state=42,stratify=Y_data)
  
  input_layer = Input(shape=(300,))

  layer = Reshape((10, 30))(input_layer)
  layer = Bidirectional(GRU(128, activation='relu'))(layer)
  layer = Dropout(drop_Out)(layer)
  layer = Dense(512, activation='relu')(layer)
  layer = Dense(512, activation='relu')(layer)
  layer = Dense(128, activation='relu')(layer)
  output_layer = Dense(1, activation='sigmoid')(layer)

  model = models.Model(input_layer, output_layer)

  start_time_train = time.time()
  model.compile("adam", loss='binary_crossentropy', metrics=['accuracy'])
  model.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=n_epochs, batch_size=512)
  end_time_train = time.time()
  time_train= (end_time_train - start_time_train)*1000
  time_train_val = round(time_train)


  accuracy = model.evaluate(X_test, Y_test, batch_size=batch_size, verbose=0)
  start_time_test = time.time()
  y_pred = model.predict(X_test)
  end_time_test = time.time()
  time_test= (end_time_test - start_time_test)*1000
  time_test_val=round(time_test)
  y_pred = np.around(y_pred, 0)

  print("Accuracy: ", metrics.accuracy_score(y_pred, Y_test))

  start_time_test = time.time()
  model.predict(X_test)
  end_time_test = time.time()
  time_test= (end_time_test - start_time_test)*1000
  time_test_val=round(time_test)

  tracemalloc.start()
  model.predict(X_test)
  snapshot = tracemalloc.take_snapshot()
  top_stats = snapshot.statistics('traceback')
  stat = top_stats[0]
  mem_test = round(stat.size/1024)
  
  precision = metrics.precision_score(y_pred,Y_test)
  precision_val=round(precision,4)

  recall= metrics.recall_score(y_pred,Y_test,average='macro')
  recal_val=round(recall,4)

  f1=metrics.f1_score(y_pred,Y_test,average='macro')
  f1_val=round(f1,4)

  accuracy_test = metrics.accuracy_score(y_pred,Y_test)
  accuracy_test_val = round(accuracy_test,4)

  params = np.sum([np.prod(v.get_shape()) for v in model.trainable_weights])

  export_dir="save_model/BRNN"

  model.save(export_dir)

  model=tf.keras.models.load_model(export_dir)  

  taille= os.stat('save_model/BRNN').st_size   

  return [nom,n_epochs,drop_Out,precision_val,recal_val,f1_val,accuracy_test_val,time_train_val, time_test_val,mem_test,params,taille]

In [None]:
scores = []
def run_experiment():
  with open('save_model/resultat_BRNN.txt', 'w') as f:
    for dropOut in [0, 0.2,0.5,0.8]:
      score= evaluate_model_BRNN(nom='BRNN', X_data = X_data_tfidf_svd, Y_data = ylabels, n_epochs=20,drop_Out= dropOut)
      #print(score)
      f.write("{0}".format(score))
      scores.append(score)
    return scores

In [None]:
scores=run_experiment()

## RCNN

In [None]:
def evaluate_model_RCNN(nom, X_data,Y_data,n_epochs, drop_Out):
  verbose , batch_size = 0 , 32
  X_train, X_test, Y_train, Y_test = train_test_split(X_data, Y_data, test_size=0.2, random_state=42,stratify=Y_data)
  
  input_layer = Input(shape=(300,))

  layer = Reshape((10, 30))(input_layer)
  
  layer = Convolution1D(128, 7, activation="relu")(layer)
  layer = Convolution1D(128, 3, activation="relu")(layer)
  layer = Dropout(drop_Out)(layer)
  layer = Flatten()(layer)
  layer = Dense(512, activation='relu')(layer)
  layer = Dense(512, activation='relu')(layer)
  layer = Dense(128, activation='relu')(layer)
  output_layer = Dense(1, activation='sigmoid')(layer)


  model = models.Model(input_layer, output_layer)

  start_time_train = time.time()
  model.compile("adam", loss='binary_crossentropy', metrics=['accuracy'])
  model.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=n_epochs, batch_size=512)
  end_time_train = time.time()
  time_train= (end_time_train - start_time_train)*1000
  time_train_val = round(time_train)


  accuracy = model.evaluate(X_test, Y_test, batch_size=batch_size, verbose=0)
  start_time_test = time.time()
  y_pred = model.predict(X_test)
  end_time_test = time.time()
  time_test= (end_time_test - start_time_test)*1000
  time_test_val=round(time_test)
  y_pred = np.around(y_pred, 0)

  print("Accuracy: ", metrics.accuracy_score(y_pred, Y_test))

  start_time_test = time.time()
  model.predict(X_test)
  end_time_test = time.time()
  time_test= (end_time_test - start_time_test)*1000
  time_test_val=round(time_test)

  tracemalloc.start()
  model.predict(X_test)
  snapshot = tracemalloc.take_snapshot()
  top_stats = snapshot.statistics('traceback')
  stat = top_stats[0]
  mem_test = round(stat.size/1024)
  
  precision = metrics.precision_score(y_pred,Y_test,average='macro')
  precision_val=round(precision,4)

  recall= metrics.recall_score(y_pred,Y_test,average='macro')
  recal_val=round(recall,4)

  f1=metrics.f1_score(y_pred,Y_test,average='macro')
  f1_val=round(f1,4)

  accuracy_test = metrics.accuracy_score(y_pred,Y_test)
  accuracy_test_val = round(accuracy_test,4)

  params = np.sum([np.prod(v.get_shape()) for v in model.trainable_weights])

  export_dir="save_model/RCNN"

  model.save(export_dir)

  model=tf.keras.models.load_model(export_dir)  

  taille= os.stat('save_model/RCNN').st_size   

  return [nom,n_epochs,drop_Out,precision_val,recal_val,f1_val,accuracy_test_val,time_train_val, time_test_val,mem_test,params,taille]

In [None]:
scores = []
def run_experiment():
  with open('save_model/resultat_RCNN.txt', 'w') as f:
    for dropOut in [0, 0.2,0.5,0.8]:
      score= evaluate_model_RCNN(nom='RCNN', X_data = X_data_tfidf_svd, Y_data = ylabels, n_epochs=20,drop_Out= dropOut)
      #print(score)
      f.write("{0}".format(score))
      scores.append(score)
    return scores

In [None]:
scores=run_experiment()