In [83]:
import torchtext.vocab as tv
import torch as torch
import torch.nn as nn
import numpy as np

In [84]:
if torch.cuda.is_available():  
    dev = "cuda:0" 
else:  
    dev = "cpu" 
device = torch.device(dev)

# 1. Préparation des données

In [97]:
# Fonction pour récupérer les données d'un fichier
# open_file(file : string) -> texts : [[string]], emotions : [string]
def open_file(file):
    texts = []
    emotions = []
    with open(file, newline = None) as f:
        for line in f:
            splited = line.split(";")
            phrases = splited[0].split() 
            emotions.append(splited[1][:-1]) # Permet de retirer le \n en fin de ligne
            texts.append(phrases)
    return texts, emotions

In [98]:
# Récupération des données 
train_texts, train_emotions = open_file("train.txt")

### Suppression des stop words

Pour la suppression des stop words, on se base sur la iste issue de https://github.com/kavgan/stop-words/blob/master/terrier-stop.txt. Certains mots ont du être supprimés de la liste de base puisqu'elle contenait par exemple le mot "good" qu'il semble cohérent de garder dans l'analyse de sentiments.

In [106]:
# Fonction pour ouvrir un fichier et en récupérer chaque ligne
def get_stop_words(file):
    words = []
    with open(file) as f:
        words = f.read().splitlines()
    return words

# Fonction pour supprimer les mots de la liste 'words' de liste de phrases 'phrases' (phrase est modifiée inplace)
def remove_words(phrases, emotions, words, verbose = True):
    removed_words = 0
    for i in range(len(words)): # pour chaque mot
        word = words[i]
        if(verbose):
            print(f"=== removing word {word} ({i+1}/{len(words)}) ===")
        for i in range(len(phrases)): # Pour chaque phrase
            for j in range(len(phrases[i])-1, -1 , -1): # Pour chaque mot de chaque phrase
                if(phrases[i][j] == word):
                    phrases[i].pop(j)
                    removed_words += 1
    emotions = [emotions[i] for i in range(len(emotions)) if len(phrases[i]) > 0]
    phrases = [phrase for phrase in phrases if len(phrase) > 0] # On supprime les phrases qui n'ont plus de mots :/
    if(verbose) : 
        print(f"Removed {removed_words} words")

In [107]:
stop_words = get_stop_words("stop_words.txt")
print(stop_words[:10])

['x', 'y', 'your', 'yours', 'yourself', 'yourselves', 'you', 'yond', 'yonder', 'yon']


In [108]:
remove_words(train_texts, train_emotions, stop_words)

=== removing word x (1/728) ===
=== removing word y (2/728) ===
=== removing word your (3/728) ===
=== removing word yours (4/728) ===
=== removing word yourself (5/728) ===
=== removing word yourselves (6/728) ===
=== removing word you (7/728) ===
=== removing word yond (8/728) ===
=== removing word yonder (9/728) ===
=== removing word yon (10/728) ===
=== removing word ye (11/728) ===
=== removing word yet (12/728) ===
=== removing word z (13/728) ===
=== removing word zillion (14/728) ===
=== removing word j (15/728) ===
=== removing word u (16/728) ===
=== removing word umpteen (17/728) ===
=== removing word usually (18/728) ===
=== removing word us (19/728) ===
=== removing word username (20/728) ===
=== removing word uponed (21/728) ===
=== removing word upons (22/728) ===
=== removing word uponing (23/728) ===
=== removing word upon (24/728) ===
=== removing word ups (25/728) ===
=== removing word upping (26/728) ===
=== removing word upped (27/728) ===
=== removing word up (28/

=== removing word abaftest (228/728) ===
=== removing word abovest (229/728) ===
=== removing word above (230/728) ===
=== removing word abover (231/728) ===
=== removing word abouter (232/728) ===
=== removing word aboutest (233/728) ===
=== removing word about (234/728) ===
=== removing word aid (235/728) ===
=== removing word amidst (236/728) ===
=== removing word amid (237/728) ===
=== removing word among (238/728) ===
=== removing word amongst (239/728) ===
=== removing word apartest (240/728) ===
=== removing word aparter (241/728) ===
=== removing word apart (242/728) ===
=== removing word appeared (243/728) ===
=== removing word appears (244/728) ===
=== removing word appear (245/728) ===
=== removing word appearing (246/728) ===
=== removing word appropriating (247/728) ===
=== removing word appropriate (248/728) ===
=== removing word appropriatest (249/728) ===
=== removing word appropriates (250/728) ===
=== removing word appropriater (251/728) ===
=== removing word appropri

=== removing word whereby (441/728) ===
=== removing word wherewithal (442/728) ===
=== removing word wherewith (443/728) ===
=== removing word whereinto (444/728) ===
=== removing word wherein (445/728) ===
=== removing word whereafter (446/728) ===
=== removing word whereas (447/728) ===
=== removing word wheresoever (448/728) ===
=== removing word wherefrom (449/728) ===
=== removing word which (450/728) ===
=== removing word whichever (451/728) ===
=== removing word whichsoever (452/728) ===
=== removing word whilst (453/728) ===
=== removing word while (454/728) ===
=== removing word whiles (455/728) ===
=== removing word whithersoever (456/728) ===
=== removing word whither (457/728) ===
=== removing word whoever (458/728) ===
=== removing word whosoever (459/728) ===
=== removing word whoso (460/728) ===
=== removing word whose (461/728) ===
=== removing word whomever (462/728) ===
=== removing word s (463/728) ===
=== removing word syne (464/728) ===
=== removing word syn (465/

=== removing word dos (657/728) ===
=== removing word dost (658/728) ===
=== removing word did (659/728) ===
=== removing word differentest (660/728) ===
=== removing word differenter (661/728) ===
=== removing word different (662/728) ===
=== removing word describing (663/728) ===
=== removing word describe (664/728) ===
=== removing word describes (665/728) ===
=== removing word described (666/728) ===
=== removing word despiting (667/728) ===
=== removing word despites (668/728) ===
=== removing word despited (669/728) ===
=== removing word despite (670/728) ===
=== removing word during (671/728) ===
=== removing word c (672/728) ===
=== removing word cum (673/728) ===
=== removing word circa (674/728) ===
=== removing word chez (675/728) ===
=== removing word cer (676/728) ===
=== removing word certain (677/728) ===
=== removing word certainest (678/728) ===
=== removing word certainer (679/728) ===
=== removing word cest (680/728) ===
=== removing word canst (681/728) ===
=== remo

In [110]:
print(train_texts[0]) # Le mot 'I' a été supprimé et aucune phrase ne se retrouve sans mot non plus
print(len(train_texts))

['didnt', 'feel', 'humiliated']
16000


### Filtrage des mots rares

On cherche ensuite à supprimer les mots qui n'apparaissent que très rarement dans le dataset. On fixe ici un seuil de rareté à au mois deux occurence pour ne plus être considéré comme rare

In [111]:
# Récupère les occurences de chaque mot sous la forme d'un dictionnaire
# @param phrases : [[string]] list des phrases des données
# @return word_occurences : {string : [(int, int)]} dictionnaire des occurences. En chaque mot on donne la liste
# des indices de chaque phrase et position d'où se trouve le mot (utilisé par la suite pour calculer les TF-IDF)
def get_words_occurences(phrases, verbose=True):
    allwords = [word for phrase in phrases for word in phrase]
    distinct_words = set(allwords)
    word_occurences = dict.fromkeys(distinct_words, None) # Cannot initiate to [] as it would yield a reference to the same array
    for i in range(len(phrases)):
        if(verbose):
            print(f"=== Traitement des occurences de la phrase {i+1}/{len(phrases)}")
        for j in range(len(phrases[i])):
            word = phrases[i][j]
            if(not word_occurences[word]): 
                word_occurences[word] = [(i,j)]
            else :
                word_occurences[word].append((i,j))
    return word_occurences

In [112]:
# Permet de supprimer les mots 'rares'
# @param threshold : int a partir de combien d'occurence d'un mot peut-on considérer qu'il n'est plus rare
def get_rarest_words(phrases, threshold):
    word_occurences = get_words_occurences(phrases)
    rarest_words = [word for word in word_occurences.keys() if len(word_occurences[word]) <= threshold]
    return rarest_words
rarest_words = get_rarest_words(train_texts, 2)

=== Traitement des occurences de la phrase 1/16000
=== Traitement des occurences de la phrase 2/16000
=== Traitement des occurences de la phrase 3/16000
=== Traitement des occurences de la phrase 4/16000
=== Traitement des occurences de la phrase 5/16000
=== Traitement des occurences de la phrase 6/16000
=== Traitement des occurences de la phrase 7/16000
=== Traitement des occurences de la phrase 8/16000
=== Traitement des occurences de la phrase 9/16000
=== Traitement des occurences de la phrase 10/16000
=== Traitement des occurences de la phrase 11/16000
=== Traitement des occurences de la phrase 12/16000
=== Traitement des occurences de la phrase 13/16000
=== Traitement des occurences de la phrase 14/16000
=== Traitement des occurences de la phrase 15/16000
=== Traitement des occurences de la phrase 16/16000
=== Traitement des occurences de la phrase 17/16000
=== Traitement des occurences de la phrase 18/16000
=== Traitement des occurences de la phrase 19/16000
=== Traitement des oc

=== Traitement des occurences de la phrase 2256/16000
=== Traitement des occurences de la phrase 2257/16000
=== Traitement des occurences de la phrase 2258/16000
=== Traitement des occurences de la phrase 2259/16000
=== Traitement des occurences de la phrase 2260/16000
=== Traitement des occurences de la phrase 2261/16000
=== Traitement des occurences de la phrase 2262/16000
=== Traitement des occurences de la phrase 2263/16000
=== Traitement des occurences de la phrase 2264/16000
=== Traitement des occurences de la phrase 2265/16000
=== Traitement des occurences de la phrase 2266/16000
=== Traitement des occurences de la phrase 2267/16000
=== Traitement des occurences de la phrase 2268/16000
=== Traitement des occurences de la phrase 2269/16000
=== Traitement des occurences de la phrase 2270/16000
=== Traitement des occurences de la phrase 2271/16000
=== Traitement des occurences de la phrase 2272/16000
=== Traitement des occurences de la phrase 2273/16000
=== Traitement des occurence

=== Traitement des occurences de la phrase 4585/16000
=== Traitement des occurences de la phrase 4586/16000
=== Traitement des occurences de la phrase 4587/16000
=== Traitement des occurences de la phrase 4588/16000
=== Traitement des occurences de la phrase 4589/16000
=== Traitement des occurences de la phrase 4590/16000
=== Traitement des occurences de la phrase 4591/16000
=== Traitement des occurences de la phrase 4592/16000
=== Traitement des occurences de la phrase 4593/16000
=== Traitement des occurences de la phrase 4594/16000
=== Traitement des occurences de la phrase 4595/16000
=== Traitement des occurences de la phrase 4596/16000
=== Traitement des occurences de la phrase 4597/16000
=== Traitement des occurences de la phrase 4598/16000
=== Traitement des occurences de la phrase 4599/16000
=== Traitement des occurences de la phrase 4600/16000
=== Traitement des occurences de la phrase 4601/16000
=== Traitement des occurences de la phrase 4602/16000
=== Traitement des occurence

=== Traitement des occurences de la phrase 6942/16000
=== Traitement des occurences de la phrase 6943/16000
=== Traitement des occurences de la phrase 6944/16000
=== Traitement des occurences de la phrase 6945/16000
=== Traitement des occurences de la phrase 6946/16000
=== Traitement des occurences de la phrase 6947/16000
=== Traitement des occurences de la phrase 6948/16000
=== Traitement des occurences de la phrase 6949/16000
=== Traitement des occurences de la phrase 6950/16000
=== Traitement des occurences de la phrase 6951/16000
=== Traitement des occurences de la phrase 6952/16000
=== Traitement des occurences de la phrase 6953/16000
=== Traitement des occurences de la phrase 6954/16000
=== Traitement des occurences de la phrase 6955/16000
=== Traitement des occurences de la phrase 6956/16000
=== Traitement des occurences de la phrase 6957/16000
=== Traitement des occurences de la phrase 6958/16000
=== Traitement des occurences de la phrase 6959/16000
=== Traitement des occurence

=== Traitement des occurences de la phrase 8423/16000
=== Traitement des occurences de la phrase 8424/16000
=== Traitement des occurences de la phrase 8425/16000
=== Traitement des occurences de la phrase 8426/16000
=== Traitement des occurences de la phrase 8427/16000
=== Traitement des occurences de la phrase 8428/16000
=== Traitement des occurences de la phrase 8429/16000
=== Traitement des occurences de la phrase 8430/16000
=== Traitement des occurences de la phrase 8431/16000
=== Traitement des occurences de la phrase 8432/16000
=== Traitement des occurences de la phrase 8433/16000
=== Traitement des occurences de la phrase 8434/16000
=== Traitement des occurences de la phrase 8435/16000
=== Traitement des occurences de la phrase 8436/16000
=== Traitement des occurences de la phrase 8437/16000
=== Traitement des occurences de la phrase 8438/16000
=== Traitement des occurences de la phrase 8439/16000
=== Traitement des occurences de la phrase 8440/16000
=== Traitement des occurence

=== Traitement des occurences de la phrase 10817/16000
=== Traitement des occurences de la phrase 10818/16000
=== Traitement des occurences de la phrase 10819/16000
=== Traitement des occurences de la phrase 10820/16000
=== Traitement des occurences de la phrase 10821/16000
=== Traitement des occurences de la phrase 10822/16000
=== Traitement des occurences de la phrase 10823/16000
=== Traitement des occurences de la phrase 10824/16000
=== Traitement des occurences de la phrase 10825/16000
=== Traitement des occurences de la phrase 10826/16000
=== Traitement des occurences de la phrase 10827/16000
=== Traitement des occurences de la phrase 10828/16000
=== Traitement des occurences de la phrase 10829/16000
=== Traitement des occurences de la phrase 10830/16000
=== Traitement des occurences de la phrase 10831/16000
=== Traitement des occurences de la phrase 10832/16000
=== Traitement des occurences de la phrase 10833/16000
=== Traitement des occurences de la phrase 10834/16000
=== Traite

=== Traitement des occurences de la phrase 13096/16000
=== Traitement des occurences de la phrase 13097/16000
=== Traitement des occurences de la phrase 13098/16000
=== Traitement des occurences de la phrase 13099/16000
=== Traitement des occurences de la phrase 13100/16000
=== Traitement des occurences de la phrase 13101/16000
=== Traitement des occurences de la phrase 13102/16000
=== Traitement des occurences de la phrase 13103/16000
=== Traitement des occurences de la phrase 13104/16000
=== Traitement des occurences de la phrase 13105/16000
=== Traitement des occurences de la phrase 13106/16000
=== Traitement des occurences de la phrase 13107/16000
=== Traitement des occurences de la phrase 13108/16000
=== Traitement des occurences de la phrase 13109/16000
=== Traitement des occurences de la phrase 13110/16000
=== Traitement des occurences de la phrase 13111/16000
=== Traitement des occurences de la phrase 13112/16000
=== Traitement des occurences de la phrase 13113/16000
=== Traite

=== Traitement des occurences de la phrase 15452/16000
=== Traitement des occurences de la phrase 15453/16000
=== Traitement des occurences de la phrase 15454/16000
=== Traitement des occurences de la phrase 15455/16000
=== Traitement des occurences de la phrase 15456/16000
=== Traitement des occurences de la phrase 15457/16000
=== Traitement des occurences de la phrase 15458/16000
=== Traitement des occurences de la phrase 15459/16000
=== Traitement des occurences de la phrase 15460/16000
=== Traitement des occurences de la phrase 15461/16000
=== Traitement des occurences de la phrase 15462/16000
=== Traitement des occurences de la phrase 15463/16000
=== Traitement des occurences de la phrase 15464/16000
=== Traitement des occurences de la phrase 15465/16000
=== Traitement des occurences de la phrase 15466/16000
=== Traitement des occurences de la phrase 15467/16000
=== Traitement des occurences de la phrase 15468/16000
=== Traitement des occurences de la phrase 15469/16000
=== Traite

In [113]:
# Prend un peu de temps
remove_words(train_texts, train_emotions, rarest_words)

=== removing word strands (1/9911) ===
=== removing word miniature (2/9911) ===
=== removing word battled (3/9911) ===
=== removing word hanford (4/9911) ===
=== removing word teeter (5/9911) ===
=== removing word proving (6/9911) ===
=== removing word beluga (7/9911) ===
=== removing word hook (8/9911) ===
=== removing word currency (9/9911) ===
=== removing word ownership (10/9911) ===
=== removing word jerome (11/9911) ===
=== removing word wallow (12/9911) ===
=== removing word albuquerque (13/9911) ===
=== removing word lupron (14/9911) ===
=== removing word flamingo (15/9911) ===
=== removing word ceo (16/9911) ===
=== removing word laiya (17/9911) ===
=== removing word realisation (18/9911) ===
=== removing word connecticut (19/9911) ===
=== removing word submission (20/9911) ===
=== removing word pleasantness (21/9911) ===
=== removing word advocates (22/9911) ===
=== removing word contacted (23/9911) ===
=== removing word unions (24/9911) ===
=== removing word uncontrollably (

=== removing word bestselling (211/9911) ===
=== removing word map (212/9911) ===
=== removing word deo (213/9911) ===
=== removing word opiates (214/9911) ===
=== removing word sections (215/9911) ===
=== removing word gary (216/9911) ===
=== removing word macabre (217/9911) ===
=== removing word ti (218/9911) ===
=== removing word khezef (219/9911) ===
=== removing word piercing (220/9911) ===
=== removing word ativan (221/9911) ===
=== removing word anansi (222/9911) ===
=== removing word casserole (223/9911) ===
=== removing word fist (224/9911) ===
=== removing word titled (225/9911) ===
=== removing word coaxed (226/9911) ===
=== removing word glycemic (227/9911) ===
=== removing word enveloped (228/9911) ===
=== removing word variants (229/9911) ===
=== removing word reciprocated (230/9911) ===
=== removing word canadians (231/9911) ===
=== removing word swelling (232/9911) ===
=== removing word barbeque (233/9911) ===
=== removing word intro (234/9911) ===
=== removing word lrt

=== removing word coach (416/9911) ===
=== removing word complications (417/9911) ===
=== removing word rum (418/9911) ===
=== removing word cleary (419/9911) ===
=== removing word kamen (420/9911) ===
=== removing word dustbin (421/9911) ===
=== removing word psychically (422/9911) ===
=== removing word basics (423/9911) ===
=== removing word georgia (424/9911) ===
=== removing word joel (425/9911) ===
=== removing word ritalin (426/9911) ===
=== removing word disgestive (427/9911) ===
=== removing word flowed (428/9911) ===
=== removing word anythings (429/9911) ===
=== removing word weeded (430/9911) ===
=== removing word fr (431/9911) ===
=== removing word refusal (432/9911) ===
=== removing word ep (433/9911) ===
=== removing word womens (434/9911) ===
=== removing word pursuit (435/9911) ===
=== removing word sulk (436/9911) ===
=== removing word undertone (437/9911) ===
=== removing word yang (438/9911) ===
=== removing word sportsline (439/9911) ===
=== removing word lingers (4

=== removing word smash (632/9911) ===
=== removing word liqueur (633/9911) ===
=== removing word abound (634/9911) ===
=== removing word unimaginable (635/9911) ===
=== removing word removing (636/9911) ===
=== removing word russell (637/9911) ===
=== removing word hummpffff (638/9911) ===
=== removing word earle (639/9911) ===
=== removing word timer (640/9911) ===
=== removing word baring (641/9911) ===
=== removing word bothersome (642/9911) ===
=== removing word batman (643/9911) ===
=== removing word dylan (644/9911) ===
=== removing word thaliad (645/9911) ===
=== removing word mumbai (646/9911) ===
=== removing word earley (647/9911) ===
=== removing word consulate (648/9911) ===
=== removing word occasions (649/9911) ===
=== removing word clint (650/9911) ===
=== removing word bang (651/9911) ===
=== removing word consolidate (652/9911) ===
=== removing word grievance (653/9911) ===
=== removing word perpetrator (654/9911) ===
=== removing word ping (655/9911) ===
=== removing

=== removing word caffeined (842/9911) ===
=== removing word prejudice (843/9911) ===
=== removing word rejuvenate (844/9911) ===
=== removing word paw (845/9911) ===
=== removing word dashboard (846/9911) ===
=== removing word essay (847/9911) ===
=== removing word panthers (848/9911) ===
=== removing word combing (849/9911) ===
=== removing word refers (850/9911) ===
=== removing word kerry (851/9911) ===
=== removing word forefoot (852/9911) ===
=== removing word navigate (853/9911) ===
=== removing word slough (854/9911) ===
=== removing word yhhhay (855/9911) ===
=== removing word abnormally (856/9911) ===
=== removing word sectret (857/9911) ===
=== removing word schindlers (858/9911) ===
=== removing word carapace (859/9911) ===
=== removing word vinegary (860/9911) ===
=== removing word melody (861/9911) ===
=== removing word geography (862/9911) ===
=== removing word acquaint (863/9911) ===
=== removing word gfathers (864/9911) ===
=== removing word clinique (865/9911) ===
===

=== removing word simplethoughtsonthings (1041/9911) ===
=== removing word whines (1042/9911) ===
=== removing word colognes (1043/9911) ===
=== removing word experts (1044/9911) ===
=== removing word pencils (1045/9911) ===
=== removing word tirades (1046/9911) ===
=== removing word quixote (1047/9911) ===
=== removing word rugby (1048/9911) ===
=== removing word lightning (1049/9911) ===
=== removing word peacefulness (1050/9911) ===
=== removing word pear (1051/9911) ===
=== removing word wip (1052/9911) ===
=== removing word deteriorated (1053/9911) ===
=== removing word insomnia (1054/9911) ===
=== removing word germans (1055/9911) ===
=== removing word fragile (1056/9911) ===
=== removing word slogging (1057/9911) ===
=== removing word sabrina (1058/9911) ===
=== removing word pillar (1059/9911) ===
=== removing word reliving (1060/9911) ===
=== removing word brokeup (1061/9911) ===
=== removing word mia (1062/9911) ===
=== removing word exit (1063/9911) ===
=== removing word pee

=== removing word inexcusable (1237/9911) ===
=== removing word preference (1238/9911) ===
=== removing word envying (1239/9911) ===
=== removing word substances (1240/9911) ===
=== removing word stumbled (1241/9911) ===
=== removing word fastforwarding (1242/9911) ===
=== removing word cantankerous (1243/9911) ===
=== removing word influences (1244/9911) ===
=== removing word enclose (1245/9911) ===
=== removing word whooped (1246/9911) ===
=== removing word blew (1247/9911) ===
=== removing word sor (1248/9911) ===
=== removing word sweatshirt (1249/9911) ===
=== removing word provoking (1250/9911) ===
=== removing word joked (1251/9911) ===
=== removing word pathological (1252/9911) ===
=== removing word mucus (1253/9911) ===
=== removing word progressively (1254/9911) ===
=== removing word egan (1255/9911) ===
=== removing word hone (1256/9911) ===
=== removing word florals (1257/9911) ===
=== removing word favourites (1258/9911) ===
=== removing word kyuhyun (1259/9911) ===
=== re

=== removing word jedi (1435/9911) ===
=== removing word poverty (1436/9911) ===
=== removing word bff (1437/9911) ===
=== removing word civilly (1438/9911) ===
=== removing word copious (1439/9911) ===
=== removing word sobered (1440/9911) ===
=== removing word macendarfer (1441/9911) ===
=== removing word electrified (1442/9911) ===
=== removing word plummeting (1443/9911) ===
=== removing word striding (1444/9911) ===
=== removing word fictional (1445/9911) ===
=== removing word restores (1446/9911) ===
=== removing word ordering (1447/9911) ===
=== removing word skipped (1448/9911) ===
=== removing word hiring (1449/9911) ===
=== removing word chalking (1450/9911) ===
=== removing word forks (1451/9911) ===
=== removing word seams (1452/9911) ===
=== removing word aimlessly (1453/9911) ===
=== removing word psychological (1454/9911) ===
=== removing word refrain (1455/9911) ===
=== removing word appriciation (1456/9911) ===
=== removing word dazs (1457/9911) ===
=== removing word r

=== removing word killer (1629/9911) ===
=== removing word asserting (1630/9911) ===
=== removing word orthodontist (1631/9911) ===
=== removing word naivety (1632/9911) ===
=== removing word clique (1633/9911) ===
=== removing word interference (1634/9911) ===
=== removing word aoi (1635/9911) ===
=== removing word intents (1636/9911) ===
=== removing word disgustingly (1637/9911) ===
=== removing word ember (1638/9911) ===
=== removing word floors (1639/9911) ===
=== removing word rereading (1640/9911) ===
=== removing word recollection (1641/9911) ===
=== removing word memorized (1642/9911) ===
=== removing word scratches (1643/9911) ===
=== removing word harrass (1644/9911) ===
=== removing word rufus (1645/9911) ===
=== removing word yoked (1646/9911) ===
=== removing word putter (1647/9911) ===
=== removing word overtook (1648/9911) ===
=== removing word alsways (1649/9911) ===
=== removing word creeping (1650/9911) ===
=== removing word briton (1651/9911) ===
=== removing word t

=== removing word elliptical (1821/9911) ===
=== removing word preached (1822/9911) ===
=== removing word unrest (1823/9911) ===
=== removing word affirmative (1824/9911) ===
=== removing word whiff (1825/9911) ===
=== removing word zooming (1826/9911) ===
=== removing word wanderlust (1827/9911) ===
=== removing word flirtiing (1828/9911) ===
=== removing word evey (1829/9911) ===
=== removing word thunk (1830/9911) ===
=== removing word flee (1831/9911) ===
=== removing word courtesy (1832/9911) ===
=== removing word crashed (1833/9911) ===
=== removing word fedotenko (1834/9911) ===
=== removing word superficial (1835/9911) ===
=== removing word assault (1836/9911) ===
=== removing word ashley (1837/9911) ===
=== removing word soldier (1838/9911) ===
=== removing word bloat (1839/9911) ===
=== removing word icy (1840/9911) ===
=== removing word slats (1841/9911) ===
=== removing word parted (1842/9911) ===
=== removing word thatll (1843/9911) ===
=== removing word dave (1844/9911) =

=== removing word wets (2031/9911) ===
=== removing word bouncer (2032/9911) ===
=== removing word burying (2033/9911) ===
=== removing word mug (2034/9911) ===
=== removing word tyres (2035/9911) ===
=== removing word frames (2036/9911) ===
=== removing word dork (2037/9911) ===
=== removing word motivating (2038/9911) ===
=== removing word mojo (2039/9911) ===
=== removing word webpage (2040/9911) ===
=== removing word photographing (2041/9911) ===
=== removing word tow (2042/9911) ===
=== removing word bouncing (2043/9911) ===
=== removing word trousers (2044/9911) ===
=== removing word reacts (2045/9911) ===
=== removing word traded (2046/9911) ===
=== removing word royally (2047/9911) ===
=== removing word brassed (2048/9911) ===
=== removing word pitiful (2049/9911) ===
=== removing word developers (2050/9911) ===
=== removing word correspond (2051/9911) ===
=== removing word crampy (2052/9911) ===
=== removing word ismaily (2053/9911) ===
=== removing word skid (2054/9911) ===
=

=== removing word terrribly (2238/9911) ===
=== removing word cycles (2239/9911) ===
=== removing word planting (2240/9911) ===
=== removing word psychologically (2241/9911) ===
=== removing word ashers (2242/9911) ===
=== removing word regarded (2243/9911) ===
=== removing word percentage (2244/9911) ===
=== removing word praising (2245/9911) ===
=== removing word boardwalk (2246/9911) ===
=== removing word reigns (2247/9911) ===
=== removing word manufacturing (2248/9911) ===
=== removing word giver (2249/9911) ===
=== removing word subscribers (2250/9911) ===
=== removing word chats (2251/9911) ===
=== removing word sword (2252/9911) ===
=== removing word bloated (2253/9911) ===
=== removing word adulthood (2254/9911) ===
=== removing word preservation (2255/9911) ===
=== removing word lolita (2256/9911) ===
=== removing word sacrificed (2257/9911) ===
=== removing word baba (2258/9911) ===
=== removing word fahad (2259/9911) ===
=== removing word ski (2260/9911) ===
=== removing wo

=== removing word sonipro (2444/9911) ===
=== removing word simon (2445/9911) ===
=== removing word drake (2446/9911) ===
=== removing word thatd (2447/9911) ===
=== removing word towel (2448/9911) ===
=== removing word needin (2449/9911) ===
=== removing word girly (2450/9911) ===
=== removing word sized (2451/9911) ===
=== removing word elitist (2452/9911) ===
=== removing word carelessness (2453/9911) ===
=== removing word cheat (2454/9911) ===
=== removing word aunty (2455/9911) ===
=== removing word atheists (2456/9911) ===
=== removing word tickles (2457/9911) ===
=== removing word carcenogenic (2458/9911) ===
=== removing word czech (2459/9911) ===
=== removing word rippling (2460/9911) ===
=== removing word minimize (2461/9911) ===
=== removing word venerate (2462/9911) ===
=== removing word sledding (2463/9911) ===
=== removing word missgivings (2464/9911) ===
=== removing word tropical (2465/9911) ===
=== removing word ethical (2466/9911) ===
=== removing word nearest (2467/9

=== removing word taxes (2637/9911) ===
=== removing word fighter (2638/9911) ===
=== removing word sic (2639/9911) ===
=== removing word backdrop (2640/9911) ===
=== removing word undeveloped (2641/9911) ===
=== removing word lethal (2642/9911) ===
=== removing word tornado (2643/9911) ===
=== removing word actuality (2644/9911) ===
=== removing word cooperative (2645/9911) ===
=== removing word sundaes (2646/9911) ===
=== removing word pawn (2647/9911) ===
=== removing word alba (2648/9911) ===
=== removing word stickiness (2649/9911) ===
=== removing word cosmos (2650/9911) ===
=== removing word clumps (2651/9911) ===
=== removing word guitars (2652/9911) ===
=== removing word soooooooooooo (2653/9911) ===
=== removing word cornelius (2654/9911) ===
=== removing word spy (2655/9911) ===
=== removing word arse (2656/9911) ===
=== removing word guessed (2657/9911) ===
=== removing word fateh (2658/9911) ===
=== removing word revision (2659/9911) ===
=== removing word stamp (2660/9911)

=== removing word waterside (2841/9911) ===
=== removing word beaubronz (2842/9911) ===
=== removing word humming (2843/9911) ===
=== removing word revealthestaryoutrulyare (2844/9911) ===
=== removing word istanbul (2845/9911) ===
=== removing word routes (2846/9911) ===
=== removing word pixies (2847/9911) ===
=== removing word burmeister (2848/9911) ===
=== removing word crunched (2849/9911) ===
=== removing word armed (2850/9911) ===
=== removing word regulated (2851/9911) ===
=== removing word patriotic (2852/9911) ===
=== removing word maks (2853/9911) ===
=== removing word digg (2854/9911) ===
=== removing word unexpectedly (2855/9911) ===
=== removing word dolls (2856/9911) ===
=== removing word wandered (2857/9911) ===
=== removing word directed (2858/9911) ===
=== removing word pining (2859/9911) ===
=== removing word sitcom (2860/9911) ===
=== removing word mating (2861/9911) ===
=== removing word retiring (2862/9911) ===
=== removing word fluffy (2863/9911) ===
=== removing

=== removing word pediatric (3090/9911) ===
=== removing word identifying (3091/9911) ===
=== removing word bankers (3092/9911) ===
=== removing word vibration (3093/9911) ===
=== removing word byte (3094/9911) ===
=== removing word sience (3095/9911) ===
=== removing word retrofitting (3096/9911) ===
=== removing word fortune (3097/9911) ===
=== removing word daley (3098/9911) ===
=== removing word cans (3099/9911) ===
=== removing word moose (3100/9911) ===
=== removing word usage (3101/9911) ===
=== removing word creatively (3102/9911) ===
=== removing word stereotypical (3103/9911) ===
=== removing word reputable (3104/9911) ===
=== removing word gertrude (3105/9911) ===
=== removing word standouts (3106/9911) ===
=== removing word fervor (3107/9911) ===
=== removing word transfer (3108/9911) ===
=== removing word gnawing (3109/9911) ===
=== removing word silhouette (3110/9911) ===
=== removing word shore (3111/9911) ===
=== removing word whatnot (3112/9911) ===
=== removing word h

=== removing word nissan (3286/9911) ===
=== removing word gawd (3287/9911) ===
=== removing word accompanied (3288/9911) ===
=== removing word hypocritical (3289/9911) ===
=== removing word altitudes (3290/9911) ===
=== removing word freind (3291/9911) ===
=== removing word possesses (3292/9911) ===
=== removing word lajoie (3293/9911) ===
=== removing word kinship (3294/9911) ===
=== removing word edinburgh (3295/9911) ===
=== removing word imaginary (3296/9911) ===
=== removing word atoshealthcare (3297/9911) ===
=== removing word peasy (3298/9911) ===
=== removing word unreality (3299/9911) ===
=== removing word valentino (3300/9911) ===
=== removing word renaissance (3301/9911) ===
=== removing word species (3302/9911) ===
=== removing word tightened (3303/9911) ===
=== removing word sensational (3304/9911) ===
=== removing word journals (3305/9911) ===
=== removing word solondz (3306/9911) ===
=== removing word shies (3307/9911) ===
=== removing word evans (3308/9911) ===
=== rem

=== removing word rivalry (3489/9911) ===
=== removing word flooded (3490/9911) ===
=== removing word weismans (3491/9911) ===
=== removing word sardonic (3492/9911) ===
=== removing word slips (3493/9911) ===
=== removing word unseat (3494/9911) ===
=== removing word motivate (3495/9911) ===
=== removing word overseas (3496/9911) ===
=== removing word ma (3497/9911) ===
=== removing word portable (3498/9911) ===
=== removing word pairing (3499/9911) ===
=== removing word lightmeter (3500/9911) ===
=== removing word alexander (3501/9911) ===
=== removing word lacy (3502/9911) ===
=== removing word coca (3503/9911) ===
=== removing word leslie (3504/9911) ===
=== removing word whales (3505/9911) ===
=== removing word asylum (3506/9911) ===
=== removing word gump (3507/9911) ===
=== removing word melodramatically (3508/9911) ===
=== removing word grievances (3509/9911) ===
=== removing word interlochen (3510/9911) ===
=== removing word jigsaw (3511/9911) ===
=== removing word requirement

=== removing word resolution (3689/9911) ===
=== removing word indigestion (3690/9911) ===
=== removing word scents (3691/9911) ===
=== removing word cert (3692/9911) ===
=== removing word colbert (3693/9911) ===
=== removing word bubbles (3694/9911) ===
=== removing word todo (3695/9911) ===
=== removing word reaches (3696/9911) ===
=== removing word ade (3697/9911) ===
=== removing word expansive (3698/9911) ===
=== removing word limply (3699/9911) ===
=== removing word overweight (3700/9911) ===
=== removing word drinkin (3701/9911) ===
=== removing word rampage (3702/9911) ===
=== removing word fulfil (3703/9911) ===
=== removing word sublimed (3704/9911) ===
=== removing word asserted (3705/9911) ===
=== removing word couples (3706/9911) ===
=== removing word bumper (3707/9911) ===
=== removing word hyenas (3708/9911) ===
=== removing word construes (3709/9911) ===
=== removing word quieter (3710/9911) ===
=== removing word weave (3711/9911) ===
=== removing word charred (3712/991

=== removing word hindrance (3895/9911) ===
=== removing word evoked (3896/9911) ===
=== removing word horrors (3897/9911) ===
=== removing word vj (3898/9911) ===
=== removing word forehead (3899/9911) ===
=== removing word heater (3900/9911) ===
=== removing word roasting (3901/9911) ===
=== removing word influencing (3902/9911) ===
=== removing word dunstable (3903/9911) ===
=== removing word reset (3904/9911) ===
=== removing word marvellous (3905/9911) ===
=== removing word disregard (3906/9911) ===
=== removing word abstinence (3907/9911) ===
=== removing word disastrous (3908/9911) ===
=== removing word denali (3909/9911) ===
=== removing word damon (3910/9911) ===
=== removing word warned (3911/9911) ===
=== removing word synonym (3912/9911) ===
=== removing word roman (3913/9911) ===
=== removing word morris (3914/9911) ===
=== removing word taewuhbeoryeo (3915/9911) ===
=== removing word rlsh (3916/9911) ===
=== removing word poetic (3917/9911) ===
=== removing word immigrati

=== removing word portland (4107/9911) ===
=== removing word faris (4108/9911) ===
=== removing word uplifter (4109/9911) ===
=== removing word wintry (4110/9911) ===
=== removing word fade (4111/9911) ===
=== removing word drags (4112/9911) ===
=== removing word lands (4113/9911) ===
=== removing word attach (4114/9911) ===
=== removing word darkest (4115/9911) ===
=== removing word wanatribe (4116/9911) ===
=== removing word flatmate (4117/9911) ===
=== removing word ample (4118/9911) ===
=== removing word embaressed (4119/9911) ===
=== removing word ideals (4120/9911) ===
=== removing word repeated (4121/9911) ===
=== removing word heated (4122/9911) ===
=== removing word uninspired (4123/9911) ===
=== removing word mcpherson (4124/9911) ===
=== removing word unattainable (4125/9911) ===
=== removing word seriouly (4126/9911) ===
=== removing word bombing (4127/9911) ===
=== removing word jtwoo (4128/9911) ===
=== removing word omangy (4129/9911) ===
=== removing word wealth (4130/9

=== removing word pilings (4315/9911) ===
=== removing word ewan (4316/9911) ===
=== removing word irresistable (4317/9911) ===
=== removing word groan (4318/9911) ===
=== removing word empathise (4319/9911) ===
=== removing word demise (4320/9911) ===
=== removing word legion (4321/9911) ===
=== removing word tho (4322/9911) ===
=== removing word tody (4323/9911) ===
=== removing word shoplifting (4324/9911) ===
=== removing word gene (4325/9911) ===
=== removing word espouse (4326/9911) ===
=== removing word rigid (4327/9911) ===
=== removing word overthink (4328/9911) ===
=== removing word schultz (4329/9911) ===
=== removing word worldly (4330/9911) ===
=== removing word righteous (4331/9911) ===
=== removing word affend (4332/9911) ===
=== removing word cracks (4333/9911) ===
=== removing word haunts (4334/9911) ===
=== removing word variations (4335/9911) ===
=== removing word resurrect (4336/9911) ===
=== removing word insight (4337/9911) ===
=== removing word asasoulawakens (43

=== removing word winehouse (4511/9911) ===
=== removing word milonga (4512/9911) ===
=== removing word buffalo (4513/9911) ===
=== removing word canadian (4514/9911) ===
=== removing word ight (4515/9911) ===
=== removing word shermin (4516/9911) ===
=== removing word canaglia (4517/9911) ===
=== removing word frequency (4518/9911) ===
=== removing word newborns (4519/9911) ===
=== removing word dormire (4520/9911) ===
=== removing word crutches (4521/9911) ===
=== removing word mehow (4522/9911) ===
=== removing word defenses (4523/9911) ===
=== removing word drift (4524/9911) ===
=== removing word xx (4525/9911) ===
=== removing word rob (4526/9911) ===
=== removing word sachaying (4527/9911) ===
=== removing word dubstep (4528/9911) ===
=== removing word weeding (4529/9911) ===
=== removing word keepmeinstitchez (4530/9911) ===
=== removing word veryy (4531/9911) ===
=== removing word nuptials (4532/9911) ===
=== removing word entrenchedly (4533/9911) ===
=== removing word phoenix 

=== removing word lecture (4715/9911) ===
=== removing word crawling (4716/9911) ===
=== removing word workings (4717/9911) ===
=== removing word itsy (4718/9911) ===
=== removing word setups (4719/9911) ===
=== removing word bellmen (4720/9911) ===
=== removing word organ (4721/9911) ===
=== removing word gather (4722/9911) ===
=== removing word movign (4723/9911) ===
=== removing word gen (4724/9911) ===
=== removing word blowout (4725/9911) ===
=== removing word noticeably (4726/9911) ===
=== removing word ardmore (4727/9911) ===
=== removing word glowy (4728/9911) ===
=== removing word blueberry (4729/9911) ===
=== removing word bombers (4730/9911) ===
=== removing word velde (4731/9911) ===
=== removing word vague (4732/9911) ===
=== removing word aggression (4733/9911) ===
=== removing word extinguished (4734/9911) ===
=== removing word recommended (4735/9911) ===
=== removing word tiphany (4736/9911) ===
=== removing word perceptions (4737/9911) ===
=== removing word fetish (473

=== removing word heartdesire (4921/9911) ===
=== removing word replacement (4922/9911) ===
=== removing word headspace (4923/9911) ===
=== removing word resolutions (4924/9911) ===
=== removing word obstacle (4925/9911) ===
=== removing word behaves (4926/9911) ===
=== removing word walt (4927/9911) ===
=== removing word toshibalol (4928/9911) ===
=== removing word partnership (4929/9911) ===
=== removing word preferred (4930/9911) ===
=== removing word reinforcement (4931/9911) ===
=== removing word shortcomings (4932/9911) ===
=== removing word dopey (4933/9911) ===
=== removing word illicits (4934/9911) ===
=== removing word dsl (4935/9911) ===
=== removing word angled (4936/9911) ===
=== removing word wholesale (4937/9911) ===
=== removing word burrowed (4938/9911) ===
=== removing word kremlin (4939/9911) ===
=== removing word congratulation (4940/9911) ===
=== removing word bsc (4941/9911) ===
=== removing word nearby (4942/9911) ===
=== removing word snuggle (4943/9911) ===
===

=== removing word competent (5145/9911) ===
=== removing word ambulatory (5146/9911) ===
=== removing word recycled (5147/9911) ===
=== removing word smidgen (5148/9911) ===
=== removing word untidiness (5149/9911) ===
=== removing word contend (5150/9911) ===
=== removing word writting (5151/9911) ===
=== removing word setbacks (5152/9911) ===
=== removing word superpowers (5153/9911) ===
=== removing word lunchtime (5154/9911) ===
=== removing word poppy (5155/9911) ===
=== removing word merit (5156/9911) ===
=== removing word gaming (5157/9911) ===
=== removing word thatrupert (5158/9911) ===
=== removing word gardening (5159/9911) ===
=== removing word curb (5160/9911) ===
=== removing word adomen (5161/9911) ===
=== removing word scariest (5162/9911) ===
=== removing word schmidt (5163/9911) ===
=== removing word roasted (5164/9911) ===
=== removing word slr (5165/9911) ===
=== removing word dial (5166/9911) ===
=== removing word dotting (5167/9911) ===
=== removing word offspring

=== removing word graham (5349/9911) ===
=== removing word greatly (5350/9911) ===
=== removing word ranging (5351/9911) ===
=== removing word beatles (5352/9911) ===
=== removing word louis (5353/9911) ===
=== removing word deferring (5354/9911) ===
=== removing word fraudulent (5355/9911) ===
=== removing word towed (5356/9911) ===
=== removing word chalice (5357/9911) ===
=== removing word uninspiring (5358/9911) ===
=== removing word michaels (5359/9911) ===
=== removing word rip (5360/9911) ===
=== removing word sharknado (5361/9911) ===
=== removing word proported (5362/9911) ===
=== removing word withdraw (5363/9911) ===
=== removing word announcing (5364/9911) ===
=== removing word disregarding (5365/9911) ===
=== removing word braided (5366/9911) ===
=== removing word animosity (5367/9911) ===
=== removing word mun (5368/9911) ===
=== removing word screenshot (5369/9911) ===
=== removing word maeve (5370/9911) ===
=== removing word kilter (5371/9911) ===
=== removing word scre

=== removing word skeleton (5553/9911) ===
=== removing word affections (5554/9911) ===
=== removing word unbelievably (5555/9911) ===
=== removing word reelected (5556/9911) ===
=== removing word trolley (5557/9911) ===
=== removing word obesity (5558/9911) ===
=== removing word misspelt (5559/9911) ===
=== removing word happenstances (5560/9911) ===
=== removing word dee (5561/9911) ===
=== removing word javascript (5562/9911) ===
=== removing word swallowed (5563/9911) ===
=== removing word chipmunk (5564/9911) ===
=== removing word salutary (5565/9911) ===
=== removing word universally (5566/9911) ===
=== removing word dripping (5567/9911) ===
=== removing word delightful (5568/9911) ===
=== removing word uncomposed (5569/9911) ===
=== removing word ptpt (5570/9911) ===
=== removing word fatally (5571/9911) ===
=== removing word sparkly (5572/9911) ===
=== removing word tazi (5573/9911) ===
=== removing word certainty (5574/9911) ===
=== removing word trigger (5575/9911) ===
=== re

=== removing word angeles (5749/9911) ===
=== removing word rainboots (5750/9911) ===
=== removing word deceive (5751/9911) ===
=== removing word bathing (5752/9911) ===
=== removing word exaggerating (5753/9911) ===
=== removing word inverted (5754/9911) ===
=== removing word fundamentalists (5755/9911) ===
=== removing word recognized (5756/9911) ===
=== removing word glamour (5757/9911) ===
=== removing word atheist (5758/9911) ===
=== removing word bu (5759/9911) ===
=== removing word contaminated (5760/9911) ===
=== removing word wap (5761/9911) ===
=== removing word responded (5762/9911) ===
=== removing word watcher (5763/9911) ===
=== removing word outlet (5764/9911) ===
=== removing word pill (5765/9911) ===
=== removing word insect (5766/9911) ===
=== removing word berries (5767/9911) ===
=== removing word separates (5768/9911) ===
=== removing word opted (5769/9911) ===
=== removing word flush (5770/9911) ===
=== removing word ravioli (5771/9911) ===
=== removing word belle 

=== removing word burns (5956/9911) ===
=== removing word haunt (5957/9911) ===
=== removing word staked (5958/9911) ===
=== removing word puppies (5959/9911) ===
=== removing word gaga (5960/9911) ===
=== removing word dashes (5961/9911) ===
=== removing word tor (5962/9911) ===
=== removing word thundershowers (5963/9911) ===
=== removing word seroquel (5964/9911) ===
=== removing word eagerly (5965/9911) ===
=== removing word manicure (5966/9911) ===
=== removing word neuropathy (5967/9911) ===
=== removing word duped (5968/9911) ===
=== removing word balancing (5969/9911) ===
=== removing word universalized (5970/9911) ===
=== removing word daydreaming (5971/9911) ===
=== removing word charlies (5972/9911) ===
=== removing word banana (5973/9911) ===
=== removing word heureulsurok (5974/9911) ===
=== removing word moisture (5975/9911) ===
=== removing word polishes (5976/9911) ===
=== removing word maury (5977/9911) ===
=== removing word snidey (5978/9911) ===
=== removing word atr

=== removing word mascaras (6151/9911) ===
=== removing word animesh (6152/9911) ===
=== removing word ryeowook (6153/9911) ===
=== removing word cohesive (6154/9911) ===
=== removing word pedals (6155/9911) ===
=== removing word compounds (6156/9911) ===
=== removing word soulation (6157/9911) ===
=== removing word warmest (6158/9911) ===
=== removing word attentions (6159/9911) ===
=== removing word poles (6160/9911) ===
=== removing word disagreement (6161/9911) ===
=== removing word angee (6162/9911) ===
=== removing word zonisamide (6163/9911) ===
=== removing word shortlisted (6164/9911) ===
=== removing word pajama (6165/9911) ===
=== removing word psychologists (6166/9911) ===
=== removing word dharavi (6167/9911) ===
=== removing word dung (6168/9911) ===
=== removing word knots (6169/9911) ===
=== removing word heroes (6170/9911) ===
=== removing word westerner (6171/9911) ===
=== removing word gimmicks (6172/9911) ===
=== removing word sorrows (6173/9911) ===
=== removing wo

=== removing word genealogical (6353/9911) ===
=== removing word bein (6354/9911) ===
=== removing word cantor (6355/9911) ===
=== removing word broaden (6356/9911) ===
=== removing word pristine (6357/9911) ===
=== removing word handbook (6358/9911) ===
=== removing word ivy (6359/9911) ===
=== removing word commodore (6360/9911) ===
=== removing word soundtrack (6361/9911) ===
=== removing word selected (6362/9911) ===
=== removing word lobbying (6363/9911) ===
=== removing word kaikohe (6364/9911) ===
=== removing word xox (6365/9911) ===
=== removing word bull (6366/9911) ===
=== removing word athletic (6367/9911) ===
=== removing word happend (6368/9911) ===
=== removing word sickly (6369/9911) ===
=== removing word dallas (6370/9911) ===
=== removing word tougher (6371/9911) ===
=== removing word reckless (6372/9911) ===
=== removing word fluids (6373/9911) ===
=== removing word unibrow (6374/9911) ===
=== removing word cloudy (6375/9911) ===
=== removing word offending (6376/991

=== removing word powell (6557/9911) ===
=== removing word antm (6558/9911) ===
=== removing word derp (6559/9911) ===
=== removing word disjointed (6560/9911) ===
=== removing word colorado (6561/9911) ===
=== removing word taryns (6562/9911) ===
=== removing word wailing (6563/9911) ===
=== removing word wrongfully (6564/9911) ===
=== removing word nitche (6565/9911) ===
=== removing word werner (6566/9911) ===
=== removing word growled (6567/9911) ===
=== removing word wardrobe (6568/9911) ===
=== removing word precisely (6569/9911) ===
=== removing word beds (6570/9911) ===
=== removing word misused (6571/9911) ===
=== removing word nuance (6572/9911) ===
=== removing word theybf (6573/9911) ===
=== removing word artificial (6574/9911) ===
=== removing word ransom (6575/9911) ===
=== removing word afaerytaleinmakebelieve (6576/9911) ===
=== removing word bullet (6577/9911) ===
=== removing word pompoms (6578/9911) ===
=== removing word concieve (6579/9911) ===
=== removing word cul

=== removing word climaxed (6760/9911) ===
=== removing word iq (6761/9911) ===
=== removing word depict (6762/9911) ===
=== removing word existence (6763/9911) ===
=== removing word shire (6764/9911) ===
=== removing word venturing (6765/9911) ===
=== removing word ineffective (6766/9911) ===
=== removing word niko (6767/9911) ===
=== removing word overtly (6768/9911) ===
=== removing word roy (6769/9911) ===
=== removing word screening (6770/9911) ===
=== removing word undutiful (6771/9911) ===
=== removing word bloodshed (6772/9911) ===
=== removing word excellence (6773/9911) ===
=== removing word flies (6774/9911) ===
=== removing word adapt (6775/9911) ===
=== removing word kierkegaard (6776/9911) ===
=== removing word sickened (6777/9911) ===
=== removing word scheduling (6778/9911) ===
=== removing word anklets (6779/9911) ===
=== removing word dissatisfactions (6780/9911) ===
=== removing word momentum (6781/9911) ===
=== removing word celebrity (6782/9911) ===
=== removing wo

=== removing word nervs (6972/9911) ===
=== removing word disc (6973/9911) ===
=== removing word direspected (6974/9911) ===
=== removing word reaffirmed (6975/9911) ===
=== removing word indigenous (6976/9911) ===
=== removing word springy (6977/9911) ===
=== removing word hedge (6978/9911) ===
=== removing word benadryl (6979/9911) ===
=== removing word tibris (6980/9911) ===
=== removing word marlowe (6981/9911) ===
=== removing word fixture (6982/9911) ===
=== removing word feather (6983/9911) ===
=== removing word fluidly (6984/9911) ===
=== removing word nationalism (6985/9911) ===
=== removing word securities (6986/9911) ===
=== removing word originally (6987/9911) ===
=== removing word pleasures (6988/9911) ===
=== removing word cities (6989/9911) ===
=== removing word deeds (6990/9911) ===
=== removing word oakwood (6991/9911) ===
=== removing word immigrants (6992/9911) ===
=== removing word semuanya (6993/9911) ===
=== removing word indigo (6994/9911) ===
=== removing word c

=== removing word location (7171/9911) ===
=== removing word ovation (7172/9911) ===
=== removing word referee (7173/9911) ===
=== removing word combine (7174/9911) ===
=== removing word treader (7175/9911) ===
=== removing word believer (7176/9911) ===
=== removing word holyhead (7177/9911) ===
=== removing word finnerty (7178/9911) ===
=== removing word miracle (7179/9911) ===
=== removing word spurring (7180/9911) ===
=== removing word accepts (7181/9911) ===
=== removing word situated (7182/9911) ===
=== removing word jovi (7183/9911) ===
=== removing word eater (7184/9911) ===
=== removing word dehydrated (7185/9911) ===
=== removing word lowly (7186/9911) ===
=== removing word pooped (7187/9911) ===
=== removing word marbles (7188/9911) ===
=== removing word luscious (7189/9911) ===
=== removing word montana (7190/9911) ===
=== removing word emoexaderistic (7191/9911) ===
=== removing word gears (7192/9911) ===
=== removing word indulged (7193/9911) ===
=== removing word bruises 

=== removing word zooms (7377/9911) ===
=== removing word addled (7378/9911) ===
=== removing word ethnicity (7379/9911) ===
=== removing word recommendation (7380/9911) ===
=== removing word hips (7381/9911) ===
=== removing word pouting (7382/9911) ===
=== removing word bunnies (7383/9911) ===
=== removing word filling (7384/9911) ===
=== removing word flirtiness (7385/9911) ===
=== removing word carrry (7386/9911) ===
=== removing word pistols (7387/9911) ===
=== removing word protecting (7388/9911) ===
=== removing word artifically (7389/9911) ===
=== removing word unresponsive (7390/9911) ===
=== removing word feelbut (7391/9911) ===
=== removing word owen (7392/9911) ===
=== removing word pandora (7393/9911) ===
=== removing word storytelling (7394/9911) ===
=== removing word marching (7395/9911) ===
=== removing word zenden (7396/9911) ===
=== removing word cools (7397/9911) ===
=== removing word lip (7398/9911) ===
=== removing word rearrange (7399/9911) ===
=== removing word m

=== removing word meditative (7572/9911) ===
=== removing word buffy (7573/9911) ===
=== removing word bench (7574/9911) ===
=== removing word detergent (7575/9911) ===
=== removing word sustainability (7576/9911) ===
=== removing word whizzes (7577/9911) ===
=== removing word misses (7578/9911) ===
=== removing word vey (7579/9911) ===
=== removing word fluttering (7580/9911) ===
=== removing word nathan (7581/9911) ===
=== removing word gilbert (7582/9911) ===
=== removing word apologetic (7583/9911) ===
=== removing word blundering (7584/9911) ===
=== removing word draftbloger (7585/9911) ===
=== removing word fifty (7586/9911) ===
=== removing word incredulity (7587/9911) ===
=== removing word digress (7588/9911) ===
=== removing word shuffle (7589/9911) ===
=== removing word prestige (7590/9911) ===
=== removing word grandmothers (7591/9911) ===
=== removing word sumthg (7592/9911) ===
=== removing word dis (7593/9911) ===
=== removing word repairman (7594/9911) ===
=== removing w

=== removing word thicker (7780/9911) ===
=== removing word jdelivery (7781/9911) ===
=== removing word burdening (7782/9911) ===
=== removing word characteristic (7783/9911) ===
=== removing word spritzer (7784/9911) ===
=== removing word signifies (7785/9911) ===
=== removing word unconscious (7786/9911) ===
=== removing word yunhos (7787/9911) ===
=== removing word willfully (7788/9911) ===
=== removing word quarters (7789/9911) ===
=== removing word provocative (7790/9911) ===
=== removing word merson (7791/9911) ===
=== removing word platonic (7792/9911) ===
=== removing word torward (7793/9911) ===
=== removing word qaf (7794/9911) ===
=== removing word vein (7795/9911) ===
=== removing word daytime (7796/9911) ===
=== removing word trainable (7797/9911) ===
=== removing word rabbit (7798/9911) ===
=== removing word portrayed (7799/9911) ===
=== removing word funeral (7800/9911) ===
=== removing word overwhelms (7801/9911) ===
=== removing word kiddies (7802/9911) ===
=== removin

=== removing word chunky (7991/9911) ===
=== removing word aloof (7992/9911) ===
=== removing word frombut (7993/9911) ===
=== removing word articulate (7994/9911) ===
=== removing word spilled (7995/9911) ===
=== removing word cheerfully (7996/9911) ===
=== removing word superstitions (7997/9911) ===
=== removing word buys (7998/9911) ===
=== removing word dies (7999/9911) ===
=== removing word kudos (8000/9911) ===
=== removing word aforementioned (8001/9911) ===
=== removing word demotivate (8002/9911) ===
=== removing word assemble (8003/9911) ===
=== removing word experimenting (8004/9911) ===
=== removing word keenly (8005/9911) ===
=== removing word nostril (8006/9911) ===
=== removing word soaking (8007/9911) ===
=== removing word trimmings (8008/9911) ===
=== removing word psychopath (8009/9911) ===
=== removing word effectiveness (8010/9911) ===
=== removing word alternated (8011/9911) ===
=== removing word marginalised (8012/9911) ===
=== removing word evolving (8013/9911) =

=== removing word rmb (8193/9911) ===
=== removing word standby (8194/9911) ===
=== removing word unza (8195/9911) ===
=== removing word hyped (8196/9911) ===
=== removing word seagulls (8197/9911) ===
=== removing word metabolism (8198/9911) ===
=== removing word childrens (8199/9911) ===
=== removing word upholstered (8200/9911) ===
=== removing word fines (8201/9911) ===
=== removing word woop (8202/9911) ===
=== removing word lair (8203/9911) ===
=== removing word loudons (8204/9911) ===
=== removing word firming (8205/9911) ===
=== removing word bonham (8206/9911) ===
=== removing word cursing (8207/9911) ===
=== removing word deepest (8208/9911) ===
=== removing word jake (8209/9911) ===
=== removing word advent (8210/9911) ===
=== removing word chart (8211/9911) ===
=== removing word dissassociated (8212/9911) ===
=== removing word mira (8213/9911) ===
=== removing word neighbor (8214/9911) ===
=== removing word tyrant (8215/9911) ===
=== removing word asma (8216/9911) ===
=== r

=== removing word observer (8387/9911) ===
=== removing word lipbalms (8388/9911) ===
=== removing word looser (8389/9911) ===
=== removing word hyphen (8390/9911) ===
=== removing word odor (8391/9911) ===
=== removing word phyica (8392/9911) ===
=== removing word evolve (8393/9911) ===
=== removing word vellas (8394/9911) ===
=== removing word fails (8395/9911) ===
=== removing word alhamdulillah (8396/9911) ===
=== removing word scum (8397/9911) ===
=== removing word homage (8398/9911) ===
=== removing word bridget (8399/9911) ===
=== removing word shielding (8400/9911) ===
=== removing word bloke (8401/9911) ===
=== removing word allergy (8402/9911) ===
=== removing word flakiness (8403/9911) ===
=== removing word exfoliate (8404/9911) ===
=== removing word merge (8405/9911) ===
=== removing word decaf (8406/9911) ===
=== removing word favorable (8407/9911) ===
=== removing word pouhere (8408/9911) ===
=== removing word burgos (8409/9911) ===
=== removing word tiniest (8410/9911) =

=== removing word legal (8594/9911) ===
=== removing word gateway (8595/9911) ===
=== removing word ash (8596/9911) ===
=== removing word dearwendy (8597/9911) ===
=== removing word filmfare (8598/9911) ===
=== removing word contradictory (8599/9911) ===
=== removing word questionnaire (8600/9911) ===
=== removing word scrap (8601/9911) ===
=== removing word headachie (8602/9911) ===
=== removing word confrontational (8603/9911) ===
=== removing word phones (8604/9911) ===
=== removing word swung (8605/9911) ===
=== removing word hockey (8606/9911) ===
=== removing word cosmopolitian (8607/9911) ===
=== removing word headlock (8608/9911) ===
=== removing word climbing (8609/9911) ===
=== removing word unspecified (8610/9911) ===
=== removing word pelvis (8611/9911) ===
=== removing word banter (8612/9911) ===
=== removing word pacesetter (8613/9911) ===
=== removing word soothing (8614/9911) ===
=== removing word handheld (8615/9911) ===
=== removing word empowering (8616/9911) ===
===

=== removing word gmc (8802/9911) ===
=== removing word humblest (8803/9911) ===
=== removing word defiance (8804/9911) ===
=== removing word disorientated (8805/9911) ===
=== removing word entertainers (8806/9911) ===
=== removing word gameplay (8807/9911) ===
=== removing word tremors (8808/9911) ===
=== removing word frantically (8809/9911) ===
=== removing word pissy (8810/9911) ===
=== removing word whimsical (8811/9911) ===
=== removing word commonplace (8812/9911) ===
=== removing word taryn (8813/9911) ===
=== removing word overeat (8814/9911) ===
=== removing word buffed (8815/9911) ===
=== removing word masses (8816/9911) ===
=== removing word victoria (8817/9911) ===
=== removing word zendikar (8818/9911) ===
=== removing word judgment (8819/9911) ===
=== removing word terrifying (8820/9911) ===
=== removing word insatiable (8821/9911) ===
=== removing word slapping (8822/9911) ===
=== removing word anchorage (8823/9911) ===
=== removing word assure (8824/9911) ===
=== remov

=== removing word concentrated (8999/9911) ===
=== removing word parade (9000/9911) ===
=== removing word confessions (9001/9911) ===
=== removing word inquire (9002/9911) ===
=== removing word eloquence (9003/9911) ===
=== removing word laughs (9004/9911) ===
=== removing word maligned (9005/9911) ===
=== removing word foment (9006/9911) ===
=== removing word metres (9007/9911) ===
=== removing word toss (9008/9911) ===
=== removing word assuming (9009/9911) ===
=== removing word mobility (9010/9911) ===
=== removing word envision (9011/9911) ===
=== removing word pounded (9012/9911) ===
=== removing word clan (9013/9911) ===
=== removing word applebees (9014/9911) ===
=== removing word silky (9015/9911) ===
=== removing word scope (9016/9911) ===
=== removing word luftwaffe (9017/9911) ===
=== removing word wihtout (9018/9911) ===
=== removing word copies (9019/9911) ===
=== removing word swirling (9020/9911) ===
=== removing word strewn (9021/9911) ===
=== removing word irregardless

=== removing word trials (9245/9911) ===
=== removing word opener (9246/9911) ===
=== removing word splashes (9247/9911) ===
=== removing word unleashed (9248/9911) ===
=== removing word jahmene (9249/9911) ===
=== removing word subway (9250/9911) ===
=== removing word glenn (9251/9911) ===
=== removing word gastric (9252/9911) ===
=== removing word congratulations (9253/9911) ===
=== removing word regurgitate (9254/9911) ===
=== removing word spats (9255/9911) ===
=== removing word rejections (9256/9911) ===
=== removing word witless (9257/9911) ===
=== removing word snakes (9258/9911) ===
=== removing word dominant (9259/9911) ===
=== removing word intimated (9260/9911) ===
=== removing word gaping (9261/9911) ===
=== removing word obedience (9262/9911) ===
=== removing word museum (9263/9911) ===
=== removing word certifiably (9264/9911) ===
=== removing word jun (9265/9911) ===
=== removing word scientist (9266/9911) ===
=== removing word diktats (9267/9911) ===
=== removing word m

=== removing word phenomena (9451/9911) ===
=== removing word ins (9452/9911) ===
=== removing word dumplings (9453/9911) ===
=== removing word reactor (9454/9911) ===
=== removing word seedy (9455/9911) ===
=== removing word catsa (9456/9911) ===
=== removing word bowled (9457/9911) ===
=== removing word beware (9458/9911) ===
=== removing word invasion (9459/9911) ===
=== removing word cumbersome (9460/9911) ===
=== removing word marriages (9461/9911) ===
=== removing word mixing (9462/9911) ===
=== removing word bicycle (9463/9911) ===
=== removing word drastic (9464/9911) ===
=== removing word lounging (9465/9911) ===
=== removing word kook (9466/9911) ===
=== removing word peaches (9467/9911) ===
=== removing word il (9468/9911) ===
=== removing word headcold (9469/9911) ===
=== removing word dailies (9470/9911) ===
=== removing word happpy (9471/9911) ===
=== removing word cobwebs (9472/9911) ===
=== removing word mukerji (9473/9911) ===
=== removing word theif (9474/9911) ===
==

=== removing word minibus (9662/9911) ===
=== removing word paraphrase (9663/9911) ===
=== removing word revovles (9664/9911) ===
=== removing word narcissism (9665/9911) ===
=== removing word governments (9666/9911) ===
=== removing word chatter (9667/9911) ===
=== removing word flicking (9668/9911) ===
=== removing word educational (9669/9911) ===
=== removing word publicly (9670/9911) ===
=== removing word homely (9671/9911) ===
=== removing word impressionable (9672/9911) ===
=== removing word fro (9673/9911) ===
=== removing word thingy (9674/9911) ===
=== removing word dejected (9675/9911) ===
=== removing word initiation (9676/9911) ===
=== removing word matte (9677/9911) ===
=== removing word antidepressants (9678/9911) ===
=== removing word tychelle (9679/9911) ===
=== removing word managers (9680/9911) ===
=== removing word perpetually (9681/9911) ===
=== removing word unreliable (9682/9911) ===
=== removing word bogart (9683/9911) ===
=== removing word convenient (9684/9911)

=== removing word unpaid (9866/9911) ===
=== removing word lunches (9867/9911) ===
=== removing word niggling (9868/9911) ===
=== removing word quantity (9869/9911) ===
=== removing word biceps (9870/9911) ===
=== removing word hyperthyroidism (9871/9911) ===
=== removing word atention (9872/9911) ===
=== removing word strasbourg (9873/9911) ===
=== removing word mcslackerson (9874/9911) ===
=== removing word clutch (9875/9911) ===
=== removing word effin (9876/9911) ===
=== removing word torch (9877/9911) ===
=== removing word promises (9878/9911) ===
=== removing word pressuring (9879/9911) ===
=== removing word aku (9880/9911) ===
=== removing word stic (9881/9911) ===
=== removing word rocked (9882/9911) ===
=== removing word react (9883/9911) ===
=== removing word carpet (9884/9911) ===
=== removing word religions (9885/9911) ===
=== removing word moshav (9886/9911) ===
=== removing word bleed (9887/9911) ===
=== removing word retain (9888/9911) ===
=== removing word symbol (9889/

In [20]:
print(get_words_occurences(train_texts)["dramatic"])

=== Traitement des occurences de la phrase 0/16000
=== Traitement des occurences de la phrase 1/16000
=== Traitement des occurences de la phrase 2/16000
=== Traitement des occurences de la phrase 3/16000
=== Traitement des occurences de la phrase 4/16000
=== Traitement des occurences de la phrase 5/16000
=== Traitement des occurences de la phrase 6/16000
=== Traitement des occurences de la phrase 7/16000
=== Traitement des occurences de la phrase 8/16000
=== Traitement des occurences de la phrase 9/16000
=== Traitement des occurences de la phrase 10/16000
=== Traitement des occurences de la phrase 11/16000
=== Traitement des occurences de la phrase 12/16000
=== Traitement des occurences de la phrase 13/16000
=== Traitement des occurences de la phrase 14/16000
=== Traitement des occurences de la phrase 15/16000
=== Traitement des occurences de la phrase 16/16000
=== Traitement des occurences de la phrase 17/16000
=== Traitement des occurences de la phrase 18/16000
=== Traitement des occ

=== Traitement des occurences de la phrase 2293/16000
=== Traitement des occurences de la phrase 2294/16000
=== Traitement des occurences de la phrase 2295/16000
=== Traitement des occurences de la phrase 2296/16000
=== Traitement des occurences de la phrase 2297/16000
=== Traitement des occurences de la phrase 2298/16000
=== Traitement des occurences de la phrase 2299/16000
=== Traitement des occurences de la phrase 2300/16000
=== Traitement des occurences de la phrase 2301/16000
=== Traitement des occurences de la phrase 2302/16000
=== Traitement des occurences de la phrase 2303/16000
=== Traitement des occurences de la phrase 2304/16000
=== Traitement des occurences de la phrase 2305/16000
=== Traitement des occurences de la phrase 2306/16000
=== Traitement des occurences de la phrase 2307/16000
=== Traitement des occurences de la phrase 2308/16000
=== Traitement des occurences de la phrase 2309/16000
=== Traitement des occurences de la phrase 2310/16000
=== Traitement des occurence

=== Traitement des occurences de la phrase 4122/16000
=== Traitement des occurences de la phrase 4123/16000
=== Traitement des occurences de la phrase 4124/16000
=== Traitement des occurences de la phrase 4125/16000
=== Traitement des occurences de la phrase 4126/16000
=== Traitement des occurences de la phrase 4127/16000
=== Traitement des occurences de la phrase 4128/16000
=== Traitement des occurences de la phrase 4129/16000
=== Traitement des occurences de la phrase 4130/16000
=== Traitement des occurences de la phrase 4131/16000
=== Traitement des occurences de la phrase 4132/16000
=== Traitement des occurences de la phrase 4133/16000
=== Traitement des occurences de la phrase 4134/16000
=== Traitement des occurences de la phrase 4135/16000
=== Traitement des occurences de la phrase 4136/16000
=== Traitement des occurences de la phrase 4137/16000
=== Traitement des occurences de la phrase 4138/16000
=== Traitement des occurences de la phrase 4139/16000
=== Traitement des occurence

=== Traitement des occurences de la phrase 6518/16000
=== Traitement des occurences de la phrase 6519/16000
=== Traitement des occurences de la phrase 6520/16000
=== Traitement des occurences de la phrase 6521/16000
=== Traitement des occurences de la phrase 6522/16000
=== Traitement des occurences de la phrase 6523/16000
=== Traitement des occurences de la phrase 6524/16000
=== Traitement des occurences de la phrase 6525/16000
=== Traitement des occurences de la phrase 6526/16000
=== Traitement des occurences de la phrase 6527/16000
=== Traitement des occurences de la phrase 6528/16000
=== Traitement des occurences de la phrase 6529/16000
=== Traitement des occurences de la phrase 6530/16000
=== Traitement des occurences de la phrase 6531/16000
=== Traitement des occurences de la phrase 6532/16000
=== Traitement des occurences de la phrase 6533/16000
=== Traitement des occurences de la phrase 6534/16000
=== Traitement des occurences de la phrase 6535/16000
=== Traitement des occurence

=== Traitement des occurences de la phrase 8956/16000
=== Traitement des occurences de la phrase 8957/16000
=== Traitement des occurences de la phrase 8958/16000
=== Traitement des occurences de la phrase 8959/16000
=== Traitement des occurences de la phrase 8960/16000
=== Traitement des occurences de la phrase 8961/16000
=== Traitement des occurences de la phrase 8962/16000
=== Traitement des occurences de la phrase 8963/16000
=== Traitement des occurences de la phrase 8964/16000
=== Traitement des occurences de la phrase 8965/16000
=== Traitement des occurences de la phrase 8966/16000
=== Traitement des occurences de la phrase 8967/16000
=== Traitement des occurences de la phrase 8968/16000
=== Traitement des occurences de la phrase 8969/16000
=== Traitement des occurences de la phrase 8970/16000
=== Traitement des occurences de la phrase 8971/16000
=== Traitement des occurences de la phrase 8972/16000
=== Traitement des occurences de la phrase 8973/16000
=== Traitement des occurence

=== Traitement des occurences de la phrase 11328/16000
=== Traitement des occurences de la phrase 11329/16000
=== Traitement des occurences de la phrase 11330/16000
=== Traitement des occurences de la phrase 11331/16000
=== Traitement des occurences de la phrase 11332/16000
=== Traitement des occurences de la phrase 11333/16000
=== Traitement des occurences de la phrase 11334/16000
=== Traitement des occurences de la phrase 11335/16000
=== Traitement des occurences de la phrase 11336/16000
=== Traitement des occurences de la phrase 11337/16000
=== Traitement des occurences de la phrase 11338/16000
=== Traitement des occurences de la phrase 11339/16000
=== Traitement des occurences de la phrase 11340/16000
=== Traitement des occurences de la phrase 11341/16000
=== Traitement des occurences de la phrase 11342/16000
=== Traitement des occurences de la phrase 11343/16000
=== Traitement des occurences de la phrase 11344/16000
=== Traitement des occurences de la phrase 11345/16000
=== Traite

=== Traitement des occurences de la phrase 13702/16000
=== Traitement des occurences de la phrase 13703/16000
=== Traitement des occurences de la phrase 13704/16000
=== Traitement des occurences de la phrase 13705/16000
=== Traitement des occurences de la phrase 13706/16000
=== Traitement des occurences de la phrase 13707/16000
=== Traitement des occurences de la phrase 13708/16000
=== Traitement des occurences de la phrase 13709/16000
=== Traitement des occurences de la phrase 13710/16000
=== Traitement des occurences de la phrase 13711/16000
=== Traitement des occurences de la phrase 13712/16000
=== Traitement des occurences de la phrase 13713/16000
=== Traitement des occurences de la phrase 13714/16000
=== Traitement des occurences de la phrase 13715/16000
=== Traitement des occurences de la phrase 13716/16000
=== Traitement des occurences de la phrase 13717/16000
=== Traitement des occurences de la phrase 13718/16000
=== Traitement des occurences de la phrase 13719/16000
=== Traite

[(467, 8), (5344, 10), (8811, 5), (8963, 9), (11534, 5), (12229, 2)]


### Checkpoint des données prétraitées

In [118]:
def write_data(path, texts, emotions):
    with open(path, "w") as f:
        for i in range(len(texts)):
            f.write(";".join([texts[i], emotions[i]])+'\n')

In [119]:
write_data("train_removed_stop_and_rare.txt", [" ".join(phrase) for phrase in train_texts], train_emotions)

In [120]:
train_texts, train_emotions = open_file("train_removed_stop_and_rare.txt")
print(train_texts[1])

['can', 'feeling', 'hopeless', 'damned', 'hopeful', 'just', 'being', 'who', 'cares', 'awake']


### Calcul de TF-IDF de chaque mot 
Pour cette partie, on s'appuie sur : https://fr.wikipedia.org/wiki/TF-IDF, https://datascientest.com/tf-idf-intelligence-artificielle. En particulier, le deuxième document donne :  
$TF(i,j) = \frac{\log_2(1+Freq(i,j))}{log_2(L_j+1)}$ et  
$IDF(i) = \log(\frac{N_D}{f_i}+1)$  
avec $Freq(i,j)$ est le nombre d'occurence du mot i dans la phrase j, $N_D$ est le nombre total de phrases, $L_i$ la longueur de la phrase $i$ et $f_i$ le nombre de phrase contenant le mot $i$. Et finalement :  
$TF-ID(i,j)=TF(i,j)*ID(i)$

In [121]:
# Calcul de la grandeur TF-IDF de chaque mot et en chaque phrase.
# @return tfidfs {str: np.array(len(phrases))} retourne les tfidfs pour chaque mot dans chaque phrase
def tfidf(phrases, verbose=True):
    n_phrases = len(phrases) # Calcul du nombre de phrase total (N_D)
    phrases_length = [len(phrase) for phrase in phrases] # Calcul de la longueur de chaque phrase (L_j)
    phrases_length = np.array(phrases_length)
    
    # Récupération des occurences de chaque mot dans le dataset
    word_occurences = get_words_occurences(phrases, verbose)
    word_list = word_occurences.keys()
    
    tfidfs = dict.fromkeys(word_list)
    for word in word_list: # Pour chaque mot du dataset d'entrainement
        occurences = word_occurences[word]
        
        # calcul du nombre de fois ou le mot word apparait dans la phrase j (Freq(i,j))
        occurences_array = np.zeros((n_phrases))
        for i,j in occurences:
            occurences_array[i]+=1
            
        fi = np.sum(occurences_array != 0) # Nombre de phrase où le mot i apparaît (f_i)
        
        # Calcul de la valeur TF-IDF
        Tf = np.log(1+occurences_array)/(phrases_length+1)
        Idf = np.log(n_phrases/fi)
        Tfidf = Tf*Idf
        tfidfs[word] = Tfidf
    
    return tfidfs

In [122]:
Tfidfs = tfidf(train_texts)

=== Traitement des occurences de la phrase 1/16000
=== Traitement des occurences de la phrase 2/16000
=== Traitement des occurences de la phrase 3/16000
=== Traitement des occurences de la phrase 4/16000
=== Traitement des occurences de la phrase 5/16000
=== Traitement des occurences de la phrase 6/16000
=== Traitement des occurences de la phrase 7/16000
=== Traitement des occurences de la phrase 8/16000
=== Traitement des occurences de la phrase 9/16000
=== Traitement des occurences de la phrase 10/16000
=== Traitement des occurences de la phrase 11/16000
=== Traitement des occurences de la phrase 12/16000
=== Traitement des occurences de la phrase 13/16000
=== Traitement des occurences de la phrase 14/16000
=== Traitement des occurences de la phrase 15/16000
=== Traitement des occurences de la phrase 16/16000
=== Traitement des occurences de la phrase 17/16000
=== Traitement des occurences de la phrase 18/16000
=== Traitement des occurences de la phrase 19/16000
=== Traitement des oc

=== Traitement des occurences de la phrase 2189/16000
=== Traitement des occurences de la phrase 2190/16000
=== Traitement des occurences de la phrase 2191/16000
=== Traitement des occurences de la phrase 2192/16000
=== Traitement des occurences de la phrase 2193/16000
=== Traitement des occurences de la phrase 2194/16000
=== Traitement des occurences de la phrase 2195/16000
=== Traitement des occurences de la phrase 2196/16000
=== Traitement des occurences de la phrase 2197/16000
=== Traitement des occurences de la phrase 2198/16000
=== Traitement des occurences de la phrase 2199/16000
=== Traitement des occurences de la phrase 2200/16000
=== Traitement des occurences de la phrase 2201/16000
=== Traitement des occurences de la phrase 2202/16000
=== Traitement des occurences de la phrase 2203/16000
=== Traitement des occurences de la phrase 2204/16000
=== Traitement des occurences de la phrase 2205/16000
=== Traitement des occurences de la phrase 2206/16000
=== Traitement des occurence

=== Traitement des occurences de la phrase 4625/16000
=== Traitement des occurences de la phrase 4626/16000
=== Traitement des occurences de la phrase 4627/16000
=== Traitement des occurences de la phrase 4628/16000
=== Traitement des occurences de la phrase 4629/16000
=== Traitement des occurences de la phrase 4630/16000
=== Traitement des occurences de la phrase 4631/16000
=== Traitement des occurences de la phrase 4632/16000
=== Traitement des occurences de la phrase 4633/16000
=== Traitement des occurences de la phrase 4634/16000
=== Traitement des occurences de la phrase 4635/16000
=== Traitement des occurences de la phrase 4636/16000
=== Traitement des occurences de la phrase 4637/16000
=== Traitement des occurences de la phrase 4638/16000
=== Traitement des occurences de la phrase 4639/16000
=== Traitement des occurences de la phrase 4640/16000
=== Traitement des occurences de la phrase 4641/16000
=== Traitement des occurences de la phrase 4642/16000
=== Traitement des occurence

=== Traitement des occurences de la phrase 6975/16000
=== Traitement des occurences de la phrase 6976/16000
=== Traitement des occurences de la phrase 6977/16000
=== Traitement des occurences de la phrase 6978/16000
=== Traitement des occurences de la phrase 6979/16000
=== Traitement des occurences de la phrase 6980/16000
=== Traitement des occurences de la phrase 6981/16000
=== Traitement des occurences de la phrase 6982/16000
=== Traitement des occurences de la phrase 6983/16000
=== Traitement des occurences de la phrase 6984/16000
=== Traitement des occurences de la phrase 6985/16000
=== Traitement des occurences de la phrase 6986/16000
=== Traitement des occurences de la phrase 6987/16000
=== Traitement des occurences de la phrase 6988/16000
=== Traitement des occurences de la phrase 6989/16000
=== Traitement des occurences de la phrase 6990/16000
=== Traitement des occurences de la phrase 6991/16000
=== Traitement des occurences de la phrase 6992/16000
=== Traitement des occurence

=== Traitement des occurences de la phrase 9365/16000
=== Traitement des occurences de la phrase 9366/16000
=== Traitement des occurences de la phrase 9367/16000
=== Traitement des occurences de la phrase 9368/16000
=== Traitement des occurences de la phrase 9369/16000
=== Traitement des occurences de la phrase 9370/16000
=== Traitement des occurences de la phrase 9371/16000
=== Traitement des occurences de la phrase 9372/16000
=== Traitement des occurences de la phrase 9373/16000
=== Traitement des occurences de la phrase 9374/16000
=== Traitement des occurences de la phrase 9375/16000
=== Traitement des occurences de la phrase 9376/16000
=== Traitement des occurences de la phrase 9377/16000
=== Traitement des occurences de la phrase 9378/16000
=== Traitement des occurences de la phrase 9379/16000
=== Traitement des occurences de la phrase 9380/16000
=== Traitement des occurences de la phrase 9381/16000
=== Traitement des occurences de la phrase 9382/16000
=== Traitement des occurence

=== Traitement des occurences de la phrase 11734/16000
=== Traitement des occurences de la phrase 11735/16000
=== Traitement des occurences de la phrase 11736/16000
=== Traitement des occurences de la phrase 11737/16000
=== Traitement des occurences de la phrase 11738/16000
=== Traitement des occurences de la phrase 11739/16000
=== Traitement des occurences de la phrase 11740/16000
=== Traitement des occurences de la phrase 11741/16000
=== Traitement des occurences de la phrase 11742/16000
=== Traitement des occurences de la phrase 11743/16000
=== Traitement des occurences de la phrase 11744/16000
=== Traitement des occurences de la phrase 11745/16000
=== Traitement des occurences de la phrase 11746/16000
=== Traitement des occurences de la phrase 11747/16000
=== Traitement des occurences de la phrase 11748/16000
=== Traitement des occurences de la phrase 11749/16000
=== Traitement des occurences de la phrase 11750/16000
=== Traitement des occurences de la phrase 11751/16000
=== Traite

=== Traitement des occurences de la phrase 14004/16000
=== Traitement des occurences de la phrase 14005/16000
=== Traitement des occurences de la phrase 14006/16000
=== Traitement des occurences de la phrase 14007/16000
=== Traitement des occurences de la phrase 14008/16000
=== Traitement des occurences de la phrase 14009/16000
=== Traitement des occurences de la phrase 14010/16000
=== Traitement des occurences de la phrase 14011/16000
=== Traitement des occurences de la phrase 14012/16000
=== Traitement des occurences de la phrase 14013/16000
=== Traitement des occurences de la phrase 14014/16000
=== Traitement des occurences de la phrase 14015/16000
=== Traitement des occurences de la phrase 14016/16000
=== Traitement des occurences de la phrase 14017/16000
=== Traitement des occurences de la phrase 14018/16000
=== Traitement des occurences de la phrase 14019/16000
=== Traitement des occurences de la phrase 14020/16000
=== Traitement des occurences de la phrase 14021/16000
=== Traite

In [15]:
print(np.sum(Tfidfs["nice"])) # Pour montrer qu'il n'est pas nul partout

21.717124147205453


### Rembourrage et rognage des phrases 

Ici on cherche à harmoniser les données pour avoir des vecteurs de taille fixe. Dans le cas où le vecteur est trop court, on le rembourre avec le caractère "<unk>" et dans le cas où la phrase est trop longue on la rogne. On choisit par la suite une taille fixe de 10 mots par phrase.   
Il pourrait être interessant de comparer les résultats finaux (sous réserve que l'on obtienne des résultats) entre un choix de rembourrer à l'avant de la phrase ou à l'arrière ou bien de rogner par l'avant où l'arrière.

In [123]:
# @param liste : liste de mots à rembourer ou rogner  
# @param n_final : Taille finale que doit atteindre la liste
# @param char : charactère de rembourage
# rembourage_rognage(liste : [string], n_final : int, char : 'string') -> liste : [string]
def rembourrage_rognage(liste, n_final, char):
    if(len(liste) > n_final):
        return liste[:n_final]
    if(len(liste) < n_final):
        return liste + [char]*(n_final - len(liste))
    return liste

### Création du vocabulaire

Le vocabulaire est créé à partir du module torchtext.vocab. Lors de la création du dataset sous forme d'id, on créé par la même occasion le dataset des TF-IDF associés à chaque mot de chaque phrase que l'on viendra accoler à la représentation one-hot du dataset avant l'apprentissage.

In [124]:
# Fonction permettant de créer les vocabulaires à l'aide de torchtext vocab pour la bdd train et les émotions.
# get_vocab(texts : [[string]], emotions : [string]) -> text_vocab : torchtext.Vocab, emotion_vocab : torchtext.Vocab
def get_vocabs(texts, emotions):
    #vocabulaire text
    text_vocab = tv.build_vocab_from_iterator(iter(texts), specials = ["<unk>"])
    
    # Ajout index défaut
    unknown_id = text_vocab.forward(["<unk>"])
    text_vocab.set_default_index(unknown_id[0])
    
    #vocabulaire emotion
    emotion_vocab = tv.build_vocab_from_iterator(iter([[emotion] for emotion in emotions]))
    print(f"Text vocab is of size {len(text_vocab)}")
    return text_vocab, emotion_vocab

# forward_vocab(texts : [[string]], emotions : [string], text_vocab : torchtext.Vocab, emotion_vocab : torchtext.Vocab)
# -> text_id : torch.tensor(int, (n_data, sentence_length)), emotions_id : torch.tensor(int, (n_data))
def forward_vocab(
    texts, 
    emotions, 
    text_vocab, 
    emotion_vocab, 
    tfidfs_id
):
    unknown_id = text_vocab.forward(["<unk>"])[0]
    sentence_length = 10 # On fixe un nombre maximal de mot à 10 par phrase
    
    # Calcul des id associés à chaque mot et rembourage des phrases
    texts_id = [text_vocab.forward(text) for text in texts]
    texts_id = [rembourrage_rognage(text, sentence_length, unknown_id) for text in texts_id]
    
    # Création du dataset avec les TF-IDF associés à chaque mot de chaque phrase
    data_tfidfs_id = []
    for i in range(len(texts_id)):
        data_tfidfs_id.append([tfidfs_id[word_id][i] for word_id in texts_id[i]])
    
    # Calcul des id associés à chaque émotion
    emotions_id = emotion_vocab.forward(emotions)
    
    # Mise en forme sous forme de tensor pytorch
    tfidfs_id = torch.tensor(data_tfidfs_id)
    texts_id = torch.tensor(texts_id)
    emotions_id = torch.tensor(emotions_id)
    
    return texts_id, emotions_id, tfidfs_id

# Permet de changer le dictionnaire des TF-IDF en une lilste de tableaux numpy dont chaque élement de la liste correspond
# au i-eme element selon le vocabulaire créé et chaque element j des tableaux est la TF-IDF du mot i associé à la phrase j
# @return tf_array [np.array(nphrases)]
def forward_Tf(Tfidfs, text_vocab, nphrases):
    tf_array = [np.zeros(nphrases)]*(len(text_vocab))
    for word in Tfidfs.keys():
        index = text_vocab.forward([word])[0]
        tf_array[index] = Tfidfs[word]
    return tf_array

In [125]:
text_vocab, emotion_vocab = get_vocabs(train_texts, train_emotions)

tfidfs_dict_id = forward_Tf(Tfidfs, text_vocab, len(train_texts))
train_texts_id, train_emotions_id, train_tfidfs_id = forward_vocab(
    train_texts, 
    train_emotions, 
    text_vocab, 
    emotion_vocab, 
    tfidfs_dict_id
)

Text vocab is of size 4928


In [126]:
print(train_tfidfs_id.size())

torch.Size([16000, 10])


### Conversion en one hot

In [127]:
emotions_one_hot = torch.nn.functional.one_hot(train_emotions_id, len(emotion_vocab))
emotions_ratio = torch.sum(emotions_one_hot, dim = 0)/torch.sum(emotions_one_hot)
print(emotions_ratio)

tensor([0.3351, 0.2916, 0.1349, 0.1211, 0.0815, 0.0358])


In [None]:
# Ne fonctionne pas sur ma machine car pas assez de mémoire, on va plutot encoder une nouvelle fois à chaque batch
texts_one_hot = torch.nn.functional.one_hot(train_texts_id, len(text_vocab))
texts_one_hot.size()
emotions_one_hot = torch.nn.functional.one_hot(train_emotions_id, len(emotion_vocab))

In [None]:
del texts_one_hot
del emotions_one_hot

# 2. Architecture du réseau

In [128]:
# Adapté de https://pytorch.org/tutorials/intermediate/char_rnn_classification_tutorial
import torch.nn as nn

class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, emb_size):
        super(RNN, self).__init__()

        self.act = nn.ReLU()
        self.hidden_size = hidden_size
        self.i2e = nn.Linear(input_size, emb_size)
        self.i2h = nn.Linear(emb_size + hidden_size, hidden_size)
        self.i2o = nn.Linear(emb_size + hidden_size, output_size)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, input, hidden):
        embedded = self.i2e(input.float())
        combined = torch.cat((embedded, hidden), 1)
        combined = self.act(combined)
        hidden = self.i2h(combined)
        output = self.i2o(combined)
        output = self.softmax(output)
        return output, hidden

    def initHidden(self, batch_size):
        hidden = torch.zeros(batch_size, self.hidden_size)
        return hidden.to(device)

In [129]:
# Essai avec un GRU
class RNN2(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, emb_size):
        super(RNN2, self).__init__()
        self.hidden_size = hidden_size
        self.emb = nn.Linear(input_size, emb_size)
        self.gru = nn.GRU(emb_size, hidden_size)
        self.i2o = nn.Linear(hidden_size, output_size)
        self.emb_size = emb_size
        self.softmax = nn.Softmax(dim=1)
    def forward(self, input, hidden):
        embedded = self.emb(input)
        output = embedded
        output, hidden = self.gru(output, hidden)
        output = self.i2o(output)[:, -1, :]
        output = self.softmax(output)
        return output, hidden
    
    def initHidden(self, batch_size):
        return torch.zeros(1, 15, self.hidden_size, device=device)

# 3. Préparation des batchs

In [130]:
# Construction du modèle (Cette cellule n'a pas besoin d'être lancée pour la suite)
n_hidden = 128
n_categories = len(emotion_vocab)
n_words = len(text_vocab)
emb_size = 100
rnn = RNN(n_words, n_hidden, n_categories, emb_size)

# Test du modèle avec un seul mot
word = text_vocab.forward(["cat"])
word = torch.tensor(word)
word = torch.nn.functional.one_hot(word, len(text_vocab))
hidden = torch.zeros(1, n_hidden)
print("output : ", rnn.forward(word, hidden))

output :  (tensor([[0.1613, 0.1673, 0.1695, 0.1783, 0.1613, 0.1623]],
       grad_fn=<SoftmaxBackward0>), tensor([[-0.0083, -0.0387, -0.0153, -0.0140,  0.0362, -0.0208, -0.0497,  0.0605,
         -0.0583,  0.0182, -0.0621,  0.0136,  0.0180,  0.0564,  0.0023, -0.0264,
         -0.0108,  0.0525, -0.0549,  0.0282, -0.0447, -0.0102,  0.0627, -0.0257,
          0.0393, -0.0252,  0.0411, -0.0390,  0.0116, -0.0205,  0.0303, -0.0199,
          0.0020,  0.0411, -0.0217,  0.0384,  0.0418, -0.0210,  0.0435,  0.0202,
         -0.0470,  0.0631,  0.0461,  0.0273, -0.0581, -0.0287, -0.0164,  0.0203,
          0.0445,  0.0500,  0.0189, -0.0098, -0.0129,  0.0296, -0.0456, -0.0184,
          0.0319, -0.0138, -0.0208, -0.0625,  0.0178, -0.0307,  0.0216, -0.0236,
         -0.0216, -0.0268,  0.0504,  0.0365, -0.0233,  0.0279,  0.0595,  0.0416,
         -0.0216,  0.0482,  0.0257,  0.0444, -0.0418,  0.0268,  0.0115,  0.0560,
         -0.0326,  0.0260,  0.0397, -0.0333,  0.0303, -0.0129, -0.0292,  0.0614,
   

# 4. Apprentissage du réseau

In [131]:
def compute_accuracy(X, Y):
    max_index_X = torch.argmax(X, dim = 1)
    max_index_Y = torch.argmax(Y, dim = 1)
    acc = int(torch.sum(max_index_X==max_index_Y).item()/X.size()[0]*100)
    return acc

# forward_model
# @param model : RNN, optimizer: torch.optim.Optimizer
# @param X: torch.tensor(0-1, (batch_size, sentence_length, text_vocab_size)
# @return output: torch.tensor(float, (batch_size, emotion_vocab_length))
def forward_model(model, X):
    hidden = model.initHidden(X.size()[0])
#     output, hidden = model(X, hidden) # Dans le cas où on utilise GRU : décommenter cette ligne et commenter celle du dessous
    for i in range(X.size()[1]):
        output, hidden = model(X[:,i,:], hidden)
    return output

# train_batch
# @param Y: torch.tensor(0-1, (batch_size, emotion_vocab_size))
# @param X: torch.tensor(0-1, (batch_size, sentence_length, text_vocab_size))
# @param learning_rate : float
# @param criterion: Loss function, 
# @param optimizer: torch.optim.Optimizer
def train_batch(Y, X, model, learning_rate, criterion, optimizer):
    
    model.zero_grad()
    output = forward_model(model, X)

    acc = compute_accuracy(output, Y)
#     print(output.size(), Y.size())
    loss = criterion(output, Y)
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()
    return output, int(loss.item()*100)/100, acc

def train(
    model, 
    batch_size, 
    epochs, 
    X_id, 
    Y_id, 
    nXvocab, # Taille du vocabulaire de l'ensemble de test (utilisé pour l'encodage one_hot à la volée)
    nYvocab, # pareil pour les émotions
    learning_rate, 
    criterion, 
    optimizer, 
    early_stop = None, 
    tfidfs_id = None
):
    n_batch = len(X_id) // batch_size
    
    accuracies = []
    min_loss = float("inf")
    early_stop_counter = 0
    print("===== beginning training =====")
    for epoch in range(epochs):
        randperm = torch.randperm(X_id.size()[0]) # On remélange le dataset au début de chaque epoch pour réduire l'overfitting
        X_id = X_id[randperm]
        if(not (tfidfs_id is None)) :
            tfidfs_id = tfidfs_id[randperm]
        Y_id = Y_id[randperm]
        
        for batch in range(n_batch):
            # Création du batch d'entrainement
            XBatch = X_id[batch*batch_size:(batch+1)*batch_size]
            XBatch_one_hot = torch.nn.functional.one_hot(XBatch, nXvocab)
            
            # Dans le cas où on utilise les tfidfs on vient concaténer les valeurs après l'encodage one_hot eg : (0,0,1,0,+0.2)
            if(not (tfidfs_id is None)):
                tfidfsBatch = tfidfs_id[batch*batch_size:(batch+1)*batch_size]
                tfidfsBatch = tfidfsBatch[:, :, None]
                XBatch_one_hot = torch.cat((XBatch_one_hot, tfidfsBatch), 2)
            
            # Création des labels d'entrainement
            YBatch = Y_id[batch*batch_size:(batch+1)*batch_size]
            YBatch_one_hot = torch.nn.functional.one_hot(YBatch, nYvocab)
            emotions_factors = (emotions_ratio[YBatch])[:, None]
            YBatch_one_hot = YBatch_one_hot / emotions_factors
            
            output, loss, acc = train_batch(
                YBatch_one_hot.to(device).to(torch.float32), 
                XBatch_one_hot.to(device).to(torch.float32), 
                model, 
                learning_rate, 
                criterion, 
                optimizer
            )
            
            output = output * emotions_factors
            accuracies.append(acc)
            if(early_stop):
                if(loss > min_loss):
                    early_stop_counter += 1
                    if(early_stop_counter >= early_stop):
                        return
                else:
                    early_stop_counter = 0
                    min_loss = loss
            
        print(f'Epoch: {epoch+1}/{epochs}, Accuracy: {int(100*sum(accuracies)/len(accuracies))/100}%')

In [132]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [133]:
batch_size = 32
epochs = 500
n_data = len(train_texts_id)

n_hidden = 10
n_categories = len(emotion_vocab)
n_words = len(text_vocab)
emb_size = 30

learning_rate = 0.001
# early_stop = 200
criterion = nn.CrossEntropyLoss()
# criterion = nn.NLLLoss()

### Apprentissage classique

In [134]:
rnn = RNN(len(text_vocab), n_hidden, n_categories, emb_size)
print(f'The model has {count_parameters(rnn):,} trainable parameters')
optimizer = torch.optim.Adam(rnn.parameters(), lr=learning_rate)

train(
    rnn,
    batch_size, 
    epochs, 
    train_texts_id[:n_data], 
    train_emotions_id[:n_data], 
    len(text_vocab), 
    len(emotion_vocab), 
    learning_rate, 
    criterion, 
    optimizer
)

The model has 148,526 trainable parameters
===== beginning training =====
Epoch: 1/500, Accuracy: 16.05%
Epoch: 2/500, Accuracy: 25.54%
Epoch: 3/500, Accuracy: 31.24%
Epoch: 4/500, Accuracy: 34.91%
Epoch: 5/500, Accuracy: 36.64%
Epoch: 6/500, Accuracy: 39.74%
Epoch: 7/500, Accuracy: 42.45%
Epoch: 8/500, Accuracy: 44.94%
Epoch: 9/500, Accuracy: 47.94%
Epoch: 10/500, Accuracy: 50.99%
Epoch: 11/500, Accuracy: 53.75%


KeyboardInterrupt: 

### Apprentissage avec TF-IDF

In [135]:
rnn2 = RNN(len(text_vocab)+1, n_hidden, n_categories, emb_size) # +1 pour prendre en compte la valeur TF-IDF ajoutée
print(f'The model has {count_parameters(rnn2):,} trainable parameters')
optimizer = torch.optim.Adam(rnn2.parameters(), lr=learning_rate)

train(
    rnn2,
    batch_size, 
    epochs, 
    train_texts_id[:n_data], 
    train_emotions_id[:n_data], 
    len(text_vocab), 
    len(emotion_vocab), 
    learning_rate, 
    criterion, 
    optimizer,
    tfidfs_id = train_tfidfs_id # Rajout du dataset des tfidfs
)

The model has 148,556 trainable parameters
===== beginning training =====
Epoch: 1/500, Accuracy: 3.5%
Epoch: 2/500, Accuracy: 13.92%
Epoch: 3/500, Accuracy: 22.38%
Epoch: 4/500, Accuracy: 29.4%
Epoch: 5/500, Accuracy: 35.38%
Epoch: 6/500, Accuracy: 40.0%
Epoch: 7/500, Accuracy: 43.74%
Epoch: 8/500, Accuracy: 46.71%
Epoch: 9/500, Accuracy: 49.29%
Epoch: 10/500, Accuracy: 51.56%
Epoch: 11/500, Accuracy: 53.49%
Epoch: 12/500, Accuracy: 55.16%
Epoch: 13/500, Accuracy: 56.66%
Epoch: 14/500, Accuracy: 58.05%
Epoch: 15/500, Accuracy: 59.32%
Epoch: 16/500, Accuracy: 60.47%
Epoch: 17/500, Accuracy: 61.51%
Epoch: 18/500, Accuracy: 62.48%
Epoch: 19/500, Accuracy: 63.37%
Epoch: 20/500, Accuracy: 64.2%
Epoch: 21/500, Accuracy: 64.96%
Epoch: 22/500, Accuracy: 65.69%
Epoch: 23/500, Accuracy: 66.39%
Epoch: 24/500, Accuracy: 67.04%
Epoch: 25/500, Accuracy: 67.64%
Epoch: 26/500, Accuracy: 68.22%
Epoch: 27/500, Accuracy: 68.76%
Epoch: 28/500, Accuracy: 69.28%
Epoch: 29/500, Accuracy: 69.76%
Epoch: 30/50

KeyboardInterrupt: 

### Sauvegarde du modèle

In [136]:
n = torch.randint(99999, (1,))[0]  # On prie pour ne pas écraser un modèle existant
path = f"./rnn2-{n}"
torch.save(rnn2.state_dict(), path)
print(f"model saved under {path}")

model saved under ./rnn2-30086


# 5. Test du modèle

In [138]:
# Importation des données
test_texts, test_emotions = open_file("test.txt")

# Suppression des stop_words
remove_words(test_texts, test_emotions, stop_words)

# Suppression des mots rares de train (on est pas obligé de le faire ici)
remove_words(test_texts, test_emotions, rarest_words)

=== removing word x (1/728) ===
=== removing word y (2/728) ===
=== removing word your (3/728) ===
=== removing word yours (4/728) ===
=== removing word yourself (5/728) ===
=== removing word yourselves (6/728) ===
=== removing word you (7/728) ===
=== removing word yond (8/728) ===
=== removing word yonder (9/728) ===
=== removing word yon (10/728) ===
=== removing word ye (11/728) ===
=== removing word yet (12/728) ===
=== removing word z (13/728) ===
=== removing word zillion (14/728) ===
=== removing word j (15/728) ===
=== removing word u (16/728) ===
=== removing word umpteen (17/728) ===
=== removing word usually (18/728) ===
=== removing word us (19/728) ===
=== removing word username (20/728) ===
=== removing word uponed (21/728) ===
=== removing word upons (22/728) ===
=== removing word uponing (23/728) ===
=== removing word upon (24/728) ===
=== removing word ups (25/728) ===
=== removing word upping (26/728) ===
=== removing word upped (27/728) ===
=== removing word up (28/

=== removing word wallets (210/9911) ===
=== removing word bestselling (211/9911) ===
=== removing word map (212/9911) ===
=== removing word deo (213/9911) ===
=== removing word opiates (214/9911) ===
=== removing word sections (215/9911) ===
=== removing word gary (216/9911) ===
=== removing word macabre (217/9911) ===
=== removing word ti (218/9911) ===
=== removing word khezef (219/9911) ===
=== removing word piercing (220/9911) ===
=== removing word ativan (221/9911) ===
=== removing word anansi (222/9911) ===
=== removing word casserole (223/9911) ===
=== removing word fist (224/9911) ===
=== removing word titled (225/9911) ===
=== removing word coaxed (226/9911) ===
=== removing word glycemic (227/9911) ===
=== removing word enveloped (228/9911) ===
=== removing word variants (229/9911) ===
=== removing word reciprocated (230/9911) ===
=== removing word canadians (231/9911) ===
=== removing word swelling (232/9911) ===
=== removing word barbeque (233/9911) ===
=== removing word i

=== removing word pony (1210/9911) ===
=== removing word basement (1211/9911) ===
=== removing word loft (1212/9911) ===
=== removing word consultants (1213/9911) ===
=== removing word chajeul (1214/9911) ===
=== removing word outdoor (1215/9911) ===
=== removing word athe (1216/9911) ===
=== removing word judgments (1217/9911) ===
=== removing word sweater (1218/9911) ===
=== removing word geared (1219/9911) ===
=== removing word pc (1220/9911) ===
=== removing word voicing (1221/9911) ===
=== removing word lovebox (1222/9911) ===
=== removing word christchurch (1223/9911) ===
=== removing word freaky (1224/9911) ===
=== removing word deke (1225/9911) ===
=== removing word whinging (1226/9911) ===
=== removing word jerks (1227/9911) ===
=== removing word bask (1228/9911) ===
=== removing word scafell (1229/9911) ===
=== removing word overreacted (1230/9911) ===
=== removing word ther (1231/9911) ===
=== removing word idle (1232/9911) ===
=== removing word stumbling (1233/9911) ===
===

=== removing word becsuse (1567/9911) ===
=== removing word lasting (1568/9911) ===
=== removing word dials (1569/9911) ===
=== removing word keita (1570/9911) ===
=== removing word waistline (1571/9911) ===
=== removing word numbing (1572/9911) ===
=== removing word reagan (1573/9911) ===
=== removing word listing (1574/9911) ===
=== removing word turnovers (1575/9911) ===
=== removing word multiband (1576/9911) ===
=== removing word stoop (1577/9911) ===
=== removing word bontoc (1578/9911) ===
=== removing word colin (1579/9911) ===
=== removing word henrietta (1580/9911) ===
=== removing word missile (1581/9911) ===
=== removing word earl (1582/9911) ===
=== removing word orphans (1583/9911) ===
=== removing word moonlighting (1584/9911) ===
=== removing word outright (1585/9911) ===
=== removing word insides (1586/9911) ===
=== removing word menopause (1587/9911) ===
=== removing word majorly (1588/9911) ===
=== removing word lava (1589/9911) ===
=== removing word persevere (1590/

=== removing word ringtone (1818/9911) ===
=== removing word truffle (1819/9911) ===
=== removing word lipstick (1820/9911) ===
=== removing word elliptical (1821/9911) ===
=== removing word preached (1822/9911) ===
=== removing word unrest (1823/9911) ===
=== removing word affirmative (1824/9911) ===
=== removing word whiff (1825/9911) ===
=== removing word zooming (1826/9911) ===
=== removing word wanderlust (1827/9911) ===
=== removing word flirtiing (1828/9911) ===
=== removing word evey (1829/9911) ===
=== removing word thunk (1830/9911) ===
=== removing word flee (1831/9911) ===
=== removing word courtesy (1832/9911) ===
=== removing word crashed (1833/9911) ===
=== removing word fedotenko (1834/9911) ===
=== removing word superficial (1835/9911) ===
=== removing word assault (1836/9911) ===
=== removing word ashley (1837/9911) ===
=== removing word soldier (1838/9911) ===
=== removing word bloat (1839/9911) ===
=== removing word icy (1840/9911) ===
=== removing word slats (1841/

=== removing word undergraduate (2984/9911) ===
=== removing word clearboth (2985/9911) ===
=== removing word isaac (2986/9911) ===
=== removing word op (2987/9911) ===
=== removing word gust (2988/9911) ===
=== removing word practicies (2989/9911) ===
=== removing word chilling (2990/9911) ===
=== removing word carbohydrates (2991/9911) ===
=== removing word intercourse (2992/9911) ===
=== removing word mags (2993/9911) ===
=== removing word criticisms (2994/9911) ===
=== removing word generator (2995/9911) ===
=== removing word iterations (2996/9911) ===
=== removing word placement (2997/9911) ===
=== removing word equality (2998/9911) ===
=== removing word amorphous (2999/9911) ===
=== removing word wwii (3000/9911) ===
=== removing word conned (3001/9911) ===
=== removing word ruslan (3002/9911) ===
=== removing word reviewer (3003/9911) ===
=== removing word mat (3004/9911) ===
=== removing word validity (3005/9911) ===
=== removing word benedictine (3006/9911) ===
=== removing wo

=== removing word criminal (3380/9911) ===
=== removing word evgeni (3381/9911) ===
=== removing word pensive (3382/9911) ===
=== removing word minuscule (3383/9911) ===
=== removing word repay (3384/9911) ===
=== removing word charm (3385/9911) ===
=== removing word outrageously (3386/9911) ===
=== removing word illicit (3387/9911) ===
=== removing word guardian (3388/9911) ===
=== removing word perspiring (3389/9911) ===
=== removing word compelled (3390/9911) ===
=== removing word predecessor (3391/9911) ===
=== removing word spotlight (3392/9911) ===
=== removing word evaporate (3393/9911) ===
=== removing word vocab (3394/9911) ===
=== removing word personalities (3395/9911) ===
=== removing word comprehensive (3396/9911) ===
=== removing word armistice (3397/9911) ===
=== removing word owners (3398/9911) ===
=== removing word mri (3399/9911) ===
=== removing word mongoose (3400/9911) ===
=== removing word tomato (3401/9911) ===
=== removing word conclusions (3402/9911) ===
=== re

=== removing word rustie (4207/9911) ===
=== removing word teachable (4208/9911) ===
=== removing word consignment (4209/9911) ===
=== removing word unheard (4210/9911) ===
=== removing word hottest (4211/9911) ===
=== removing word partying (4212/9911) ===
=== removing word communing (4213/9911) ===
=== removing word feverish (4214/9911) ===
=== removing word performed (4215/9911) ===
=== removing word emanating (4216/9911) ===
=== removing word pussy (4217/9911) ===
=== removing word freezer (4218/9911) ===
=== removing word colombians (4219/9911) ===
=== removing word tail (4220/9911) ===
=== removing word gracias (4221/9911) ===
=== removing word klein (4222/9911) ===
=== removing word inescapable (4223/9911) ===
=== removing word cdm (4224/9911) ===
=== removing word rugmi (4225/9911) ===
=== removing word blazer (4226/9911) ===
=== removing word diabetes (4227/9911) ===
=== removing word exposures (4228/9911) ===
=== removing word stewarts (4229/9911) ===
=== removing word buddhi

=== removing word dent (5035/9911) ===
=== removing word purchasing (5036/9911) ===
=== removing word clammy (5037/9911) ===
=== removing word coolum (5038/9911) ===
=== removing word flourish (5039/9911) ===
=== removing word cricnepal (5040/9911) ===
=== removing word provider (5041/9911) ===
=== removing word coherent (5042/9911) ===
=== removing word hallmark (5043/9911) ===
=== removing word bioware (5044/9911) ===
=== removing word whitney (5045/9911) ===
=== removing word bagan (5046/9911) ===
=== removing word bury (5047/9911) ===
=== removing word expands (5048/9911) ===
=== removing word unilaterally (5049/9911) ===
=== removing word acclimated (5050/9911) ===
=== removing word jug (5051/9911) ===
=== removing word bikini (5052/9911) ===
=== removing word killjoy (5053/9911) ===
=== removing word el (5054/9911) ===
=== removing word ann (5055/9911) ===
=== removing word solitary (5056/9911) ===
=== removing word suppress (5057/9911) ===
=== removing word buzz (5058/9911) ===


=== removing word belongingness (5706/9911) ===
=== removing word backpacking (5707/9911) ===
=== removing word burner (5708/9911) ===
=== removing word aryiku (5709/9911) ===
=== removing word caveman (5710/9911) ===
=== removing word battery (5711/9911) ===
=== removing word builds (5712/9911) ===
=== removing word kaddish (5713/9911) ===
=== removing word issued (5714/9911) ===
=== removing word caf (5715/9911) ===
=== removing word unsung (5716/9911) ===
=== removing word roundabouts (5717/9911) ===
=== removing word inundated (5718/9911) ===
=== removing word squeek (5719/9911) ===
=== removing word lve (5720/9911) ===
=== removing word brains (5721/9911) ===
=== removing word intermittent (5722/9911) ===
=== removing word processes (5723/9911) ===
=== removing word unusually (5724/9911) ===
=== removing word dharma (5725/9911) ===
=== removing word evangelical (5726/9911) ===
=== removing word proclaiming (5727/9911) ===
=== removing word lateral (5728/9911) ===
=== removing word

=== removing word reckless (6372/9911) ===
=== removing word fluids (6373/9911) ===
=== removing word unibrow (6374/9911) ===
=== removing word cloudy (6375/9911) ===
=== removing word offending (6376/9911) ===
=== removing word activations (6377/9911) ===
=== removing word blip (6378/9911) ===
=== removing word pavement (6379/9911) ===
=== removing word haul (6380/9911) ===
=== removing word ii (6381/9911) ===
=== removing word deformed (6382/9911) ===
=== removing word cornbread (6383/9911) ===
=== removing word unwell (6384/9911) ===
=== removing word slacking (6385/9911) ===
=== removing word equals (6386/9911) ===
=== removing word ut (6387/9911) ===
=== removing word shifts (6388/9911) ===
=== removing word mil (6389/9911) ===
=== removing word remedy (6390/9911) ===
=== removing word jude (6391/9911) ===
=== removing word screenwriters (6392/9911) ===
=== removing word drunken (6393/9911) ===
=== removing word grandchildren (6394/9911) ===
=== removing word din (6395/9911) ===
=

=== removing word guily (7054/9911) ===
=== removing word lyman (7055/9911) ===
=== removing word wisely (7056/9911) ===
=== removing word located (7057/9911) ===
=== removing word agonized (7058/9911) ===
=== removing word penetrating (7059/9911) ===
=== removing word azul (7060/9911) ===
=== removing word nude (7061/9911) ===
=== removing word newcomers (7062/9911) ===
=== removing word plantar (7063/9911) ===
=== removing word approx (7064/9911) ===
=== removing word leo (7065/9911) ===
=== removing word melts (7066/9911) ===
=== removing word fangirls (7067/9911) ===
=== removing word funk (7068/9911) ===
=== removing word wryly (7069/9911) ===
=== removing word tremendously (7070/9911) ===
=== removing word bombed (7071/9911) ===
=== removing word cheesecake (7072/9911) ===
=== removing word quoted (7073/9911) ===
=== removing word edmontonians (7074/9911) ===
=== removing word housing (7075/9911) ===
=== removing word reports (7076/9911) ===
=== removing word ushering (7077/9911)

=== removing word gal (7704/9911) ===
=== removing word def (7705/9911) ===
=== removing word pleading (7706/9911) ===
=== removing word rucksack (7707/9911) ===
=== removing word maximize (7708/9911) ===
=== removing word eminent (7709/9911) ===
=== removing word avalanche (7710/9911) ===
=== removing word permitting (7711/9911) ===
=== removing word unsatisfied (7712/9911) ===
=== removing word weaved (7713/9911) ===
=== removing word canning (7714/9911) ===
=== removing word dm (7715/9911) ===
=== removing word crucified (7716/9911) ===
=== removing word everybodys (7717/9911) ===
=== removing word additionally (7718/9911) ===
=== removing word hapiness (7719/9911) ===
=== removing word coffin (7720/9911) ===
=== removing word vulturous (7721/9911) ===
=== removing word papamoka (7722/9911) ===
=== removing word quell (7723/9911) ===
=== removing word rejoice (7724/9911) ===
=== removing word cent (7725/9911) ===
=== removing word thn (7726/9911) ===
=== removing word chopped (7727/

=== removing word cred (8704/9911) ===
=== removing word undone (8705/9911) ===
=== removing word oppose (8706/9911) ===
=== removing word vortex (8707/9911) ===
=== removing word throws (8708/9911) ===
=== removing word moribund (8709/9911) ===
=== removing word magnate (8710/9911) ===
=== removing word pneumonia (8711/9911) ===
=== removing word bobbing (8712/9911) ===
=== removing word sifting (8713/9911) ===
=== removing word perfumes (8714/9911) ===
=== removing word recommending (8715/9911) ===
=== removing word scored (8716/9911) ===
=== removing word nikos (8717/9911) ===
=== removing word preaching (8718/9911) ===
=== removing word reverses (8719/9911) ===
=== removing word vexed (8720/9911) ===
=== removing word taunted (8721/9911) ===
=== removing word childlike (8722/9911) ===
=== removing word constructivism (8723/9911) ===
=== removing word auction (8724/9911) ===
=== removing word creativecommons (8725/9911) ===
=== removing word squirmed (8726/9911) ===
=== removing wor

=== removing word anthology (9203/9911) ===
=== removing word receivers (9204/9911) ===
=== removing word mistaken (9205/9911) ===
=== removing word timeline (9206/9911) ===
=== removing word reuse (9207/9911) ===
=== removing word psalm (9208/9911) ===
=== removing word bowel (9209/9911) ===
=== removing word customized (9210/9911) ===
=== removing word achievements (9211/9911) ===
=== removing word glance (9212/9911) ===
=== removing word frequent (9213/9911) ===
=== removing word materialistic (9214/9911) ===
=== removing word retired (9215/9911) ===
=== removing word jazzed (9216/9911) ===
=== removing word hints (9217/9911) ===
=== removing word booking (9218/9911) ===
=== removing word windmill (9219/9911) ===
=== removing word motorcyclist (9220/9911) ===
=== removing word continuity (9221/9911) ===
=== removing word riotousrambling (9222/9911) ===
=== removing word meditated (9223/9911) ===
=== removing word blinked (9224/9911) ===
=== removing word songy (9225/9911) ===
=== re

=== removing word equilibrium (9858/9911) ===
=== removing word ancestral (9859/9911) ===
=== removing word vacancy (9860/9911) ===
=== removing word ruthless (9861/9911) ===
=== removing word slipped (9862/9911) ===
=== removing word savingyourmarriagebeforeitstarts (9863/9911) ===
=== removing word latex (9864/9911) ===
=== removing word clarify (9865/9911) ===
=== removing word unpaid (9866/9911) ===
=== removing word lunches (9867/9911) ===
=== removing word niggling (9868/9911) ===
=== removing word quantity (9869/9911) ===
=== removing word biceps (9870/9911) ===
=== removing word hyperthyroidism (9871/9911) ===
=== removing word atention (9872/9911) ===
=== removing word strasbourg (9873/9911) ===
=== removing word mcslackerson (9874/9911) ===
=== removing word clutch (9875/9911) ===
=== removing word effin (9876/9911) ===
=== removing word torch (9877/9911) ===
=== removing word promises (9878/9911) ===
=== removing word pressuring (9879/9911) ===
=== removing word aku (9880/99

In [139]:
# Calcul des tfidfs
test_tfidfs_dict = tfidf(test_texts)
test_tfidfs_dict_id = forward_Tf(test_tfidfs_dict, text_vocab, len(test_texts))
test_texts_id, test_emotions_id, test_tfidfs_id = forward_vocab(
    test_texts, 
    test_emotions, 
    text_vocab,
    emotion_vocab, 
    test_tfidfs_dict_id
)

=== Traitement des occurences de la phrase 1/2000
=== Traitement des occurences de la phrase 2/2000
=== Traitement des occurences de la phrase 3/2000
=== Traitement des occurences de la phrase 4/2000
=== Traitement des occurences de la phrase 5/2000
=== Traitement des occurences de la phrase 6/2000
=== Traitement des occurences de la phrase 7/2000
=== Traitement des occurences de la phrase 8/2000
=== Traitement des occurences de la phrase 9/2000
=== Traitement des occurences de la phrase 10/2000
=== Traitement des occurences de la phrase 11/2000
=== Traitement des occurences de la phrase 12/2000
=== Traitement des occurences de la phrase 13/2000
=== Traitement des occurences de la phrase 14/2000
=== Traitement des occurences de la phrase 15/2000
=== Traitement des occurences de la phrase 16/2000
=== Traitement des occurences de la phrase 17/2000
=== Traitement des occurences de la phrase 18/2000
=== Traitement des occurences de la phrase 19/2000
=== Traitement des occurences de la phra

In [140]:
def test_model(model, texts_id, emotions_id, nXvocab, nYvocab, tfidfs_id = None):
    XBatch = texts_id
    XBatch_one_hot = torch.nn.functional.one_hot(XBatch, nXvocab)

    if(not (tfidfs_id is None)):
        tfidfsBatch = tfidfs_id
        tfidfsBatch = tfidfsBatch[:, :, None]
        XBatch_one_hot = torch.cat((XBatch_one_hot, tfidfsBatch), 2)

    YBatch = emotions_id
    YBatch_one_hot = torch.nn.functional.one_hot(YBatch, nYvocab)

    output = forward_model(model, XBatch_one_hot)
    acc = compute_accuracy(output, YBatch_one_hot)
    return output, acc

In [141]:
def calculate_confusion(output, expected_id):
    max_index_X = torch.argmax(output, dim = 1)
    max_index_Y = expected_id
    nemotions = output.size()[1]
    confusion = np.zeros((nemotions, nemotions))
    for i in range(nemotions):
        for j in range(nemotions):
            confusion[i, j] = torch.sum((max_index_X == i)*(max_index_Y == j)) # Si prédit i alors que la valeur attendue est j
    return confusion

In [142]:
output, acc = test_model(
    rnn2, 
    test_texts_id, 
    test_emotions_id, 
    len(text_vocab), 
    len(emotion_vocab), 
    test_tfidfs_id
)
print(acc)

66


In [143]:
calculate_confusion(output, test_emotions_id) # verticalement la classe prédite et horizontalement la classe réelle

array([[517.,  27.,  26.,  15.,  26.,  18.],
       [  8., 399.,  19.,   8.,   5.,   2.],
       [ 21.,  88., 147.,  23.,   2.,   1.],
       [ 60.,  52.,  73., 121.,   3.,   9.],
       [ 64.,   9.,   2.,   1., 119.,   0.],
       [ 25.,   6.,   8.,  56.,   4.,  36.]])

# 6. Test utilisateur final

Cette partie met juste en application le modèle final créé pour afficher les résultats de l'algorithme sous un ensemble aléatorie de données

In [144]:
# Chargement des données
test_texts, test_emotions = open_file("test.txt")

In [145]:
# Chargement du modèle 
model = RNN(len(text_vocab)+1, n_hidden, n_categories, emb_size)
checkpoint = torch.load("rnn2-36109") # Le modèle 36109 a atteint 70% d'accuracy en test
model.load_state_dict(checkpoint)

<All keys matched successfully>

In [150]:
def show_random_prediction(
    model, 
    test_texts, 
    test_emotions, 
    stop_words, 
    rarest_words, 
    text_vocab, 
    emotion_vocab, 
    nsample):
    
    ntest = len(test_texts)
    random_samples = torch.randint(ntest, (nsample,))
    test_texts = [test_texts[random_samples[i]] for i in range(nsample)]
    test_emotions = [test_emotions[random_samples[i]] for i in range(nsample)]
    
    for i in range(nsample):
        print(f"=== Phrase {i} ===")
        test_text = test_texts[i]
        test_emotion = test_emotions[i]
        print(f"{' '.join(test_text)} : {test_emotion}")
        test_text = [test_text]
        test_emotion = [test_emotion]
        
        # Suppression des stop_words
        remove_words(test_text, test_emotion, stop_words, verbose = False)

        # Suppression des mots rares de train (on est pas obligé de le faire ici)
        remove_words(test_text, test_emotion, rarest_words, verbose = False)
        # Calcul des tfidfs
        test_tfidfs_dict = tfidf(test_text, verbose = False)
        test_tfidfs_dict_id = forward_Tf(test_tfidfs_dict, text_vocab, len(test_text))
        test_text_id, test_emotions_id, test_tfidfs_id = forward_vocab(
            test_text, 
            test_emotion, 
            text_vocab,
            emotion_vocab, 
            test_tfidfs_dict_id
        )
        output, _ = test_model(
            model, 
            test_text_id, 
            test_emotions_id, 
            len(text_vocab), 
            len(emotion_vocab), 
            test_tfidfs_id
        )
        max_index_X = torch.argmax(output, dim = 1)
        
        print(f"predicted : {emotion_vocab.lookup_token(max_index_X[0])}\n")

In [151]:
show_random_prediction(
    model,
    test_texts,
    test_emotions, 
    stop_words, 
    rarest_words,
    text_vocab,
    emotion_vocab,
    10
)

=== Phrase 0 ===
im feeling depressed again : sadness
predicted : sadness

=== Phrase 1 ===
i feel really uptight and unable to unwind : fear
predicted : fear

=== Phrase 2 ===
i went outside to shut in the hens then was tempted by the brilliance of the stars to walk across the frozen fields feeling very cold looking up into the sky : anger
predicted : sadness

=== Phrase 3 ===
i feel transcendant and splendid : joy
predicted : joy

=== Phrase 4 ===
i feel for my sweet boy : love
predicted : surprise

=== Phrase 5 ===
i feel more loyal to micah : love
predicted : love

=== Phrase 6 ===
i don t feel like i should be punished to carry this burden even though i have been for four years now : sadness
predicted : sadness

=== Phrase 7 ===
i have a feeling this is a bit naughty scanning an article from a magazine but i know that so many people would love to read thi : love
predicted : fear

=== Phrase 8 ===
i wonder if the homeowners would feel weird if i parked to gape at their landscaping 