In [3]:
import spacy
import timeit
import math
import pandas as pd
import matplotlib.pyplot as plt
from os import path
from collections import Counter
from lxml import etree
from glob import glob
from unicodedata import normalize
from tqdm import tqdm

In [48]:
nlp = spacy.load('fr_core_news_lg')

In [5]:
nlp.add_pipe("sentencizer")

<spacy.pipeline.sentencizer.Sentencizer at 0x7fa3f27b3240>

In [67]:
nlp.max_length = 4000000

In [17]:
nlp.disable_pipes('tok2vec')

['tok2vec']

In [49]:
path_name = 'corpus_temp/*.txt'
#path_name = 'corpus_test/*.txt'
#path_name = 'corpus_main_txt/*.txt'
window = 1000
nombre_bigrammes = 100

In [6]:
def pipeline_spacy(path):
    pos_ko = ["NUM", "X", "SYM", "PUNCT", "SPACE"]
    list_lemma = []
    list_pos = []
    nombre_tokens = 0
    with open(path, encoding="utf8") as file:
        text = file.readlines()
        text_clean = clean_text(str(text).lower())
        docs = nlp(text_clean)
        nombre_tokens += len(docs)
        for token in docs:
            #si le token est bien un mot on récupère son lemme
            if token.pos_ not in pos_ko:
                list_lemma.append(token.lemma_)
                list_pos.append(token.pos_)

    return list_lemma, list_pos, nombre_tokens

In [10]:
def clean_text(txt):
    txt_res = normalize("NFKD", txt.replace('\xa0', ' '))
    txt_res = txt_res.replace('\\xa0', '')
    return txt_res

In [11]:
def bigrammize(list_token):
    """fonction qui prend en parametre une liste de tokens et retourne une liste de bi-grammes"""
    list_bigram = []
    for indice_token in range(len(list_token)-1):
        bigram = list_token[indice_token]+'_'+list_token[indice_token+1]
        list_bigram.append(bigram)
    return list_bigram

In [12]:
def trigrammize(list_token):
    """fonction qui prend en parametre une liste de tokens et retourne une liste de tri-grammes"""
    list_trigram = []
    for indice_token in range(len(list_token)-2):
        trigram = list_token[indice_token]+'_'+list_token[indice_token+1]+'_'+list_token[indice_token+2]
        list_trigram.append(trigram)
    return list_trigram

In [14]:
def rollingntokens(list_tokens, n):
    """fonction qui prend en parametre une liste de tokens et un nombre n et decoupe cette liste en fragments de n tokens puis retourne une liste de listes de tokens"""
    i = 0 # i stocke l'indice auquel on est dans le rolling
    list_rolling = []
    while i-n < len(list_tokens):
        list_rolling.append(list_tokens[i:i+n])
        i+=n
    return list_rolling 

In [15]:
def rolling_operationnalisation(rolling_list, window, doc_name, nombre_bigrammes, feature):
    """fonction qui prend en parametre une liste de liste de token  et une fenetre et calcul le ratio type-token pour chaque liste et retourne une liste de liste de valeurs"""
    i = 1
    list_type_token = []
    list_entropy = []
    for list_tokens in rolling_list:
        if type(list_tokens) == list:
            table_freq = Counter(list_tokens)
            list_type_token.append(type_token(table_freq, window))
            list_entropy.append(entropy(table_freq, window, nombre_bigrammes))
            #On stocke chaque table de frequences pour les chunks de 1000 mots dans des dataframmes
            zipped = list(zip(table_freq.keys(), table_freq.values()))
            df = pd.DataFrame(zipped, columns=[feature, 'Nombre d\'apparitions'])
            df['Frequence d\'apparition'] = round(df['Nombre d\'apparitions']/sum(table_freq.values()),5)
            df.to_csv(r'tables_frequences/'+feature+'/'+doc_name+'_'+feature+str(i)+'freq.csv', index = False)
            i+=1
        
    return list_type_token, list_entropy

In [16]:
"""fonction qui prend en parametre une table de frequence et retourne le calcul du ratio type-token"""
def type_token(table_freq, window):
    #test de verification sur les valeurs recuperees
    if sum(table_freq.values()) == window:
        #On calcule le type_token ratio et on le retourne
        return round(len(table_freq)/sum(table_freq.values()),5)

In [17]:
def entropy(table_freq, window, nombre_bigrammes):
    """fonction qui prend en parametre une table de frequence et un nombre de bigramme et retourne leur entropie"""
    # initialisation de l'indice de shannon
    shannon_sum = 0
    #test de verification sur les valeurs recuperees
    if sum(table_freq.values()) == window:
        #On calcule l'entropie et on la retourne
        table_freq_select = dict(table_freq.most_common(nombre_bigrammes))
        for bigram in table_freq_select.keys():
            prop = table_freq_select[bigram]/window
            shannon_sum += prop * (math.log(prop, 5))   
        return round(shannon_sum * -1,2)

In [13]:
def get_n_most_common_features(list_tokens, n):
    table_freq = dict(Counter(list_tokens).most_common(n))
    return list(table_freq.keys())

In [14]:
n_most_common_features = 1000

In [19]:
def moulinette(path_name, n):
    """fonction main qui utilise les fonctions précédentes et tourne sur le corpus"""
    
    i = 1
    nombre_total_tokens = 0
 
    list_lemma_global = []
    list_bigram_lemma_global = []
    list_trigram_lemma_global = []
    list_pos_global = []
    list_bigram_pos_global = []
    list_trigram_pos_global = []

    
    print("\n\nBEGIN PROCESSING CORPUS-----------")
    
    for doc in glob(path_name):
        
        print("\n\nBEGIN PROCESSING NOVEL-----------")

        
        doc_name = path.splitext(path.basename(doc))[0]
        date = doc_name.split("_")[0]
        print(doc_name)
        
        #On recupere le texte des romans sous forme de listes de lemmes et de pos grâce à spacy
        
        list_lemma_temp, list_pos_temp, nombre_tokens = pipeline_spacy(doc)
        
        print("PIPELINE SPACY ----------- OK")
        
        print("NOMBRE TOKENS = ", nombre_tokens)
        
        nombre_total_tokens += nombre_tokens
        
        list_lemma_global += list_lemma_temp
        list_bigram_lemma_global += bigrammize(list_lemma_temp)
        list_trigram_lemma_global += trigrammize(list_lemma_temp)
        
        list_pos_global += list_pos_temp
        list_bigram_pos_global += bigrammize(list_pos_temp)
        list_trigram_pos_global += trigrammize(list_pos_temp)
    
        i+=1

        print("END PROCESSING NOVEL --------------\n\n")
        print("PROGRESSION ", round(i/2,3),'% COMPLETED\n')
        
    print("\n GET LISTS RESULTS -----------")
    list_lemma_result = get_n_most_common_features(list_lemma_global, n)
    list_bigram_lemma_result = get_n_most_common_features(list_bigram_lemma_global, n)
    list_trigram_lemma_result = get_n_most_common_features(list_trigram_lemma_global, n)
    
    list_pos_result = get_n_most_common_features(list_pos_global, n)
    list_bigram_pos_result = get_n_most_common_features(list_bigram_pos_global, n)
    list_trigram_pos_result = get_n_most_common_features(list_trigram_pos_global, n)

        
    print("\n NOMBRE TOTAL TOKENS = ", nombre_total_tokens)
    print("\n RETURN LISTS RESULTS -----------")
    print("\n\n END PROCESSING CORPUS --------------\n\n")
            
    return list_lemma_result, list_bigram_lemma_result, list_trigram_lemma_result, list_pos_result, list_bigram_pos_result, list_trigram_pos_result

In [40]:
starttime = timeit.default_timer()
list_lemma_result, list_bigram_lemma_result, list_trigram_lemma_result, list_pos_result, list_bigram_pos_result, list_trigram_pos_result = moulinette(path_name, n_most_common_features) 
print("Le temps total d'execution en secondes est de : ", timeit.default_timer() - starttime)



BEGIN PROCESSING CORPUS-----------


BEGIN PROCESSING NOVEL-----------
2001_Nothomb-Amelie_Cosmetique-de-l-ennemi
PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  26259
END PROCESSING NOVEL --------------


PROGRESSION  1.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1864_Erckmann-Chatrian_Histoire-d-un-conscrit-de-1813
PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  80653
END PROCESSING NOVEL --------------


PROGRESSION  1.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1866_Sand-George_Promenades-autour-d-un-village
PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  51939
END PROCESSING NOVEL --------------


PROGRESSION  2.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1931_Simenon-Georges_Au-Rendez-vous-des-Terre-Neuvas
PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  45650
END PROCESSING NOVEL --------------


PROGRESSION  2.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1888_Guy-de-Maupassant_Pierre-et-Jean
PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  66802
END PROCES

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  46786
END PROCESSING NOVEL --------------


PROGRESSION  21.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1881_Berthet-Elie_Le-Charlatan
PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  100838
END PROCESSING NOVEL --------------


PROGRESSION  21.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1899_Eekhoud-Georges_Escal-Vigor
PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  55107
END PROCESSING NOVEL --------------


PROGRESSION  22.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1886_Daudet-Alphonse_La-Belle-Nivernaise
PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  35650
END PROCESSING NOVEL --------------


PROGRESSION  22.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1928_Leroux-Gaston_Les-Mohicans-de-Babel
PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  103745
END PROCESSING NOVEL --------------


PROGRESSION  23.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1840_Sand-George_Pauline
PIPELINE SPACY ----------- OK
NOMBRE 

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  82498
END PROCESSING NOVEL --------------


PROGRESSION  41.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1931_Simenon-Georges_La-nuit-du-carrefour
PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  45575
END PROCESSING NOVEL --------------


PROGRESSION  42.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1967_Simon-Claude_Histoire
PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  119590
END PROCESSING NOVEL --------------


PROGRESSION  42.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1843_Feval-Paul_Le-loup-blanc
PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  96094
END PROCESSING NOVEL --------------


PROGRESSION  43.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1952_San-Antonio_Mes-hommages-a-la-donzelle
PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  44921
END PROCESSING NOVEL --------------


PROGRESSION  43.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1868_La-Rive-William-de_La-Marquise-de-Clerol-par-William-de-La-Ri

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  70951
END PROCESSING NOVEL --------------


PROGRESSION  62.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1881_Sand-George_Jeanne
PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  125366
END PROCESSING NOVEL --------------


PROGRESSION  63.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1921_Renard-Maurice_L-Homme-Truque
PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  40560
END PROCESSING NOVEL --------------


PROGRESSION  63.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1900_Colette_La-retraite-sentimentale
PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  55430
END PROCESSING NOVEL --------------


PROGRESSION  64.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1845_Balzac-Honore-de_Adieu
PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  18068
END PROCESSING NOVEL --------------


PROGRESSION  64.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1884_Silvestre-Armand_En-pleine-fantaisie
PIPELINE SPACY ----------- OK
NOMBRE TOKEN

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  50570
END PROCESSING NOVEL --------------


PROGRESSION  83.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1922_Jaloux-Edmond_L-escalier-d-or
PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  46799
END PROCESSING NOVEL --------------


PROGRESSION  84.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1897_Lemonnier-Camille_L-homme-en-amour
PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  60455
END PROCESSING NOVEL --------------


PROGRESSION  84.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1920_Leroux-Gaston_Aventures-effroyables-de-M-Herbert-de-Renich_Tome-II-La-Bataille-invisible
PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  106672
END PROCESSING NOVEL --------------


PROGRESSION  85.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1880_Stapleaux-Leopold_Le-pendu-de-la-Foret-Noire
PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  111706
END PROCESSING NOVEL --------------


PROGRESSION  85.5 % COMPLETED



BEGIN PROCESSING NOVEL-

In [41]:
list_lemma_result

['le',
 'de',
 'un',
 'et',
 'il',
 'avoir',
 'à',
 'lui',
 'être',
 'son',
 'que',
 'l’',
 'ce',
 'je',
 'en',
 'd’',
 'qui',
 'pas',
 'ne',
 'vous',
 'se',
 'dans',
 'qu’',
 'tout',
 'pour',
 'faire',
 'dire',
 's’',
 'mon',
 'au',
 'éter',
 'n’',
 'sur',
 'plus',
 '-',
 'mais',
 'on',
 '–',
 'avec',
 'me',
 'par',
 'comme',
 'c’',
 'nous',
 'pouvoir',
 'si',
 'bien',
 'j’',
 'y',
 'voir',
 'aller',
 'même',
 'moi',
 'tu',
 'leur',
 'sans',
 'être',
 'm’',
 'vouloir',
 'savoir',
 'où',
 'venir',
 'homme',
 'autre',
 'ou',
 'à',
 'petit',
 'quelque',
 'prendre',
 'grand',
 'encore',
 'votre',
 'femme',
 'rien',
 'quand',
 'là',
 'main',
 'peu',
 'jour',
 'celui',
 'dont',
 'bon',
 'mettre',
 'aussi',
 'jeune',
 'heure',
 'cela',
 'devoir',
 'falloir',
 'été',
 'non',
 'croire',
 'temps',
 'oeil',
 'puis',
 'chose',
 'donc',
 'sous',
 'ça',
 'jamais',
 'Monsieur',
 'fois',
 'toujours',
 'passer',
 'notre',
 'après',
 'seul',
 'ton',
 'tête',
 'alors',
 'porte',
 'entendre',


In [42]:
list_bigram_lemma_result

['de_le',
 'dans_le',
 'd’_un',
 'qu’_il',
 'de_son',
 'à_le',
 'c’_être',
 'et_le',
 'sur_le',
 'que_le',
 'de_l’',
 'n’_avoir',
 'il_avoir',
 'et_de',
 'de_ce',
 'que_je',
 'j’_avoir',
 'tout_le',
 'je_ne',
 'y_avoir',
 'à_l’',
 'qu’_lui',
 'lui_avoir',
 'avoir_pas',
 'à_son',
 'par_le',
 'dans_un',
 'il_être',
 'il_ne',
 'avec_un',
 'avoir_un',
 'il_y',
 'il_se',
 'pour_le',
 'dans_son',
 'il_n’',
 'être_pas',
 'l’_avoir',
 'que_vous',
 'n’_être',
 'comme_un',
 'être_un',
 'je_être',
 'le_plus',
 'ne_pouvoir',
 'je_vous',
 'à_ce',
 'ce_être',
 'qu’_on',
 'et_il',
 'à_un',
 'vous_avoir',
 'de_mon',
 'c’_éter',
 'avoir_été',
 'un_peu',
 'il_falloir',
 'ce_que',
 'qui_avoir',
 'le_main',
 'de_tout',
 'je_me',
 'être_-ce',
 'avoir_le',
 'être_le',
 'et_je',
 'dire_il',
 'il_s’',
 'm’_avoir',
 'pas_de',
 'par_un',
 '-_être',
 'je_n’',
 'pouvoir_-',
 'qu’_un',
 'faire_un',
 'pas_le',
 'avoir_faire',
 'le_porte',
 'que_ce',
 'sur_son',
 'ce_qui',
 'avec_le',
 'dans_l’',
 'il_éter

In [43]:
list_trigram_lemma_result

['il_y_avoir',
 'n’_avoir_pas',
 'pouvoir_-_être',
 'n’_être_pas',
 'qu’_il_avoir',
 'je_n’_avoir',
 'lui_-_même',
 'c’_être_un',
 'que_j’_avoir',
 'il_n’_y',
 'n’_y_avoir',
 'ne_être_pas',
 'ce_qu’_il',
 'y_avoir_un',
 'n’_éter_pas',
 'il_n’_avoir',
 'ce_n’_être',
 'être_-ce_que',
 'je_ne_savoir',
 'c’_être_le',
 'ne_savoir_pas',
 'qu’_lui_avoir',
 'tout_à_coup',
 'qu’_il_ne',
 'au_milieu_de',
 'que_je_ne',
 'ne_vouloir_pas',
 'se_mettre_à',
 'qu’_il_n’',
 'de_tout_le',
 'ne_pouvoir_pas',
 'qu’_être_-ce',
 'ce_que_je',
 'qu’_il_être',
 'tout_le_monde',
 'c’_éter_un',
 'le_porte_de',
 'd’_un_voix',
 'n’_avoir_jamais',
 'je_ne_pouvoir',
 'le_jeune_fille',
 'être_-ce_pas',
 'lui_n’_avoir',
 'que_c’_être',
 'qu’_il_y',
 'je_l’_avoir',
 'n’_être_-ce',
 'de_ne_pas',
 'et_de_le',
 'que_je_être',
 'et_qu’_il',
 'un_de_ce',
 'à_l’_heure',
 'je_ne_être',
 'y_avoir_pas',
 'n’_avoir_plus',
 'ce_être_un',
 'je_me_être',
 'que_vous_avoir',
 'au_fond_de',
 'au_bout_de',
 'c’_éter_le',
 'avoi

In [44]:
list_pos_result

['NOUN',
 'VERB',
 'PRON',
 'ADP',
 'DET',
 'ADV',
 'ADJ',
 'PROPN',
 'CCONJ',
 'AUX',
 'SCONJ',
 'INTJ']

In [45]:
list_bigram_pos_result

['DET_NOUN',
 'PRON_VERB',
 'NOUN_ADP',
 'ADP_DET',
 'ADP_NOUN',
 'VERB_ADP',
 'NOUN_PRON',
 'VERB_DET',
 'PRON_PRON',
 'VERB_ADV',
 'NOUN_ADJ',
 'NOUN_VERB',
 'NOUN_DET',
 'NOUN_CCONJ',
 'VERB_PRON',
 'ADJ_NOUN',
 'PRON_AUX',
 'ADV_VERB',
 'NOUN_NOUN',
 'DET_ADJ',
 'ADP_PRON',
 'PRON_ADV',
 'SCONJ_PRON',
 'NOUN_ADV',
 'VERB_VERB',
 'AUX_VERB',
 'ADV_PRON',
 'ADP_VERB',
 'ADV_ADP',
 'ADJ_ADP',
 'CCONJ_PRON',
 'VERB_NOUN',
 'ADJ_PRON',
 'NOUN_PROPN',
 'ADV_DET',
 'ADP_PROPN',
 'ADV_ADV',
 'PROPN_PRON',
 'ADJ_DET',
 'ADV_ADJ',
 'VERB_SCONJ',
 'PROPN_ADP',
 'CCONJ_DET',
 'CCONJ_ADP',
 'PRON_ADP',
 'ADJ_CCONJ',
 'NOUN_SCONJ',
 'VERB_ADJ',
 'SCONJ_DET',
 'DET_PROPN',
 'ADP_ADJ',
 'VERB_PROPN',
 'PROPN_VERB',
 'AUX_ADV',
 'PRON_DET',
 'ADJ_VERB',
 'NOUN_AUX',
 'VERB_CCONJ',
 'ADP_ADV',
 'PROPN_PROPN',
 'PROPN_DET',
 'ADV_NOUN',
 'CCONJ_VERB',
 'ADV_SCONJ',
 'PROPN_NOUN',
 'PRON_NOUN',
 'PROPN_CCONJ',
 'ADV_AUX',
 'PROPN_ADV',
 'PRON_PROPN',
 'ADJ_ADV',
 'ADJ_ADJ',
 'CCONJ_ADV',
 'AUX_ADJ',
 

In [46]:
list_trigram_pos_result

['ADP_DET_NOUN',
 'DET_NOUN_ADP',
 'VERB_DET_NOUN',
 'NOUN_ADP_NOUN',
 'NOUN_ADP_DET',
 'PRON_PRON_VERB',
 'DET_NOUN_PRON',
 'PRON_VERB_ADP',
 'VERB_ADP_DET',
 'NOUN_PRON_VERB',
 'NOUN_DET_NOUN',
 'PRON_VERB_DET',
 'ADP_NOUN_ADP',
 'DET_NOUN_ADJ',
 'DET_NOUN_VERB',
 'DET_ADJ_NOUN',
 'PRON_VERB_ADV',
 'NOUN_PRON_PRON',
 'DET_NOUN_CCONJ',
 'DET_NOUN_DET',
 'VERB_ADP_NOUN',
 'PRON_AUX_VERB',
 'PRON_VERB_PRON',
 'ADP_NOUN_PRON',
 'DET_NOUN_NOUN',
 'VERB_PRON_VERB',
 'ADV_DET_NOUN',
 'PRON_VERB_VERB',
 'DET_NOUN_ADV',
 'ADV_PRON_VERB',
 'ADV_VERB_ADV',
 'PRON_ADV_VERB',
 'ADJ_DET_NOUN',
 'ADP_PRON_VERB',
 'NOUN_VERB_ADP',
 'CCONJ_DET_NOUN',
 'NOUN_ADJ_ADP',
 'ADP_NOUN_DET',
 'ADJ_NOUN_ADP',
 'PRON_PRON_PRON',
 'ADP_NOUN_CCONJ',
 'VERB_ADV_ADP',
 'PRON_PRON_AUX',
 'NOUN_VERB_DET',
 'ADP_DET_ADJ',
 'SCONJ_PRON_VERB',
 'SCONJ_DET_NOUN',
 'ADJ_ADP_DET',
 'NOUN_ADP_PRON',
 'NOUN_PRON_AUX',
 'ADP_VERB_DET',
 'NOUN_CCONJ_PRON',
 'VERB_ADP_PRON',
 'NOUN_ADP_VERB',
 'NOUN_ADP_PROPN',
 'VERB_PRON_PRO

In [50]:
starttime = timeit.default_timer()
list_lemma_result, list_bigram_lemma_result, list_trigram_lemma_result, list_pos_result, list_bigram_pos_result, list_trigram_pos_result = moulinette(path_name, n_most_common_features) 
print("Le temps total d'execution en secondes est de : ", timeit.default_timer() - starttime)



BEGIN PROCESSING CORPUS-----------


BEGIN PROCESSING NOVEL-----------
1829_Hugo-Victor_Le-dernier-jour-d-un-condamne
PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  40498
END PROCESSING NOVEL --------------


PROGRESSION  1.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1831_Signol-Alphonse-Macaire-Stanislas_Le-Chiffonnier_Tome-2
PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  31732
END PROCESSING NOVEL --------------


PROGRESSION  1.5 % COMPLETED


 GET LISTS RESULTS -----------

 NOMBRE TOTAL TOKENS =  72230

 RETURN LISTS RESULTS -----------


 END PROCESSING CORPUS --------------


Le temps total d'execution en secondes est de :  28.75209702199936


In [51]:
list_trigram_pos_result

['ADP_DET_NOUN',
 'DET_NOUN_ADP',
 'VERB_DET_NOUN',
 'NOUN_ADP_NOUN',
 'NOUN_ADP_DET',
 'DET_NOUN_PRON',
 'PRON_PRON_VERB',
 'NOUN_DET_NOUN',
 'VERB_ADP_DET',
 'PRON_VERB_ADP',
 'NOUN_PRON_VERB',
 'ADP_NOUN_ADP',
 'PRON_VERB_DET',
 'DET_NOUN_ADJ',
 'NOUN_PRON_PRON',
 'DET_NOUN_VERB',
 'PRON_AUX_VERB',
 'DET_ADJ_NOUN',
 'DET_NOUN_DET',
 'DET_NOUN_CCONJ',
 'ADP_NOUN_PRON',
 'VERB_ADP_NOUN',
 'PRON_VERB_PRON',
 'PRON_VERB_ADV',
 'DET_NOUN_NOUN',
 'ADJ_DET_NOUN',
 'VERB_PRON_VERB',
 'NOUN_PRON_AUX',
 'PRON_VERB_VERB',
 'PRON_PRON_AUX',
 'ADV_DET_NOUN',
 'ADP_NOUN_CCONJ',
 'DET_NOUN_ADV',
 'ADP_NOUN_DET',
 'NOUN_VERB_ADP',
 'PRON_PRON_PRON',
 'ADV_PRON_VERB',
 'CCONJ_DET_NOUN',
 'ADJ_NOUN_ADP',
 'ADV_VERB_ADV',
 'ADP_DET_ADJ',
 'SCONJ_PRON_VERB',
 'PRON_ADV_VERB',
 'SCONJ_DET_NOUN',
 'NOUN_ADJ_ADP',
 'PRON_DET_NOUN',
 'NOUN_CCONJ_ADP',
 'AUX_VERB_ADP',
 'NOUN_VERB_DET',
 'VERB_ADV_ADP',
 'NOUN_CCONJ_PRON',
 'ADP_PRON_VERB',
 'NOUN_ADP_PRON',
 'AUX_VERB_DET',
 'VERB_ADP_PRON',
 'DET_NOUN_AUX

In [52]:
list_trigram_lemma_result

['il_y_avoir',
 'le_peine_de',
 'n’_avoir_pas',
 'être_-ce_que',
 'c’_être_un',
 'peine_de_mort',
 'que_c’_être',
 'n’_y_avoir',
 'n’_être_pas',
 'pouvoir_-_être',
 'il_n’_y',
 'y_avoir_un',
 'qu’_être_-ce',
 'avoir_-t_il',
 'qu’_il_avoir',
 'ce_n’_être',
 'qu’_il_y',
 'que_j’_avoir',
 'il_m’_avoir',
 'ce_qu’_il',
 'c’_être_le',
 'je_ne_savoir',
 'ne_être_pas',
 'le_porte_de',
 'il_falloir_que',
 'je_n’_avoir',
 'de_le_peine',
 'que_l’_on',
 'que_je_être',
 'qu’_il_être',
 'et_j’_avoir',
 'lui_avoir_je',
 'de_tout_le',
 'c’_éter_un',
 'vif_le_nation',
 'je_l’_avoir',
 'le_tête_de',
 'en_ce_moment',
 'à_travers_le',
 'de_le_prison',
 'je_me_être',
 'j’_avoir_faire',
 'à_l’_instant',
 'il_s’_être',
 'n’_avoir_pouvoir',
 'un_de_ce',
 'n’_avoir_plus',
 'tout_à_coup',
 'qu’_il_ne',
 'c’_éter_le',
 'c’_être_que',
 'y_avoir_pas',
 'au_milieu_de',
 'm’_avoir_-t',
 'l’_assemblée_national',
 'ne_vouloir_pas',
 'au_nom_de',
 'celui_-_là',
 'c’_être_de',
 'au_moment_où',
 'qu’_on_ne',
 '

## Motifs

In [21]:
def moulinette(path_name, n):
    """fonction main qui utilise les fonctions précédentes et tourne sur le corpus"""
    
    i = 1
    nombre_total_tokens = 0
    nombre_total_sentences = 0
 
    list_motif_global = []
    list_bigram_motif_global = []
    list_trigram_motif_global = []

    
    print("\n\nBEGIN PROCESSING CORPUS-----------")
    
    for doc in tqdm(glob(path_name)):
        
        print("\n\nBEGIN PROCESSING NOVEL-----------")

        
        doc_name = path.splitext(path.basename(doc))[0]
        date = doc_name.split("_")[0]
        print(doc_name)
        
        #On recupere le texte des romans sous forme de listes de lemmes et de pos grâce à spacy
        
        list_motif_temp, nombre_tokens, nombre_sentences = pipeline_spacy_motifs(doc)
        
        print("PIPELINE SPACY ----------- OK")
        
        print("NOMBRE TOKENS = ", nombre_tokens)
        
        nombre_total_tokens += nombre_tokens
        nombre_total_sentences += nombre_sentences
        
        list_motif_global += list_motif_temp
        list_bigram_motif_global += bigrammize(list_motif_temp)
        list_trigram_motif_global += trigrammize(list_motif_temp)
    
        i+=1

        print("END PROCESSING NOVEL --------------\n\n")
        print("PROGRESSION ", round(i/2,3),'% COMPLETED\n')
        
    print("\n GET LISTS RESULTS -----------")
    list_motif_result = get_n_most_common_features(list_motif_global, n)
    list_bigram_motif_result = get_n_most_common_features(list_bigram_motif_global, n)
    list_trigram_motif_result = get_n_most_common_features(list_trigram_motif_global, n)
        
    print("\n NOMBRE TOTAL TOKENS = ", nombre_total_tokens)
    print("\n NOMBRE TOTAL SENTENCES = ", nombre_total_sentences)

    print("\n RETURN LISTS RESULTS -----------")
    print("\n\n END PROCESSING CORPUS --------------\n\n")
            
    return list_motif_result, list_bigram_motif_result, list_trigram_motif_result

In [61]:
path_name = 'corpus_temp/*.txt'
#path_name = 'corpus_test/*.txt'
#path_name = 'corpus_main_txt/*.txt'
window = 1000
nombre_bigrammes = 100
n_most_common_features = 1000

In [63]:
def pipeline_spacy_motifs(path):
    pos_ko = ["NUM", "X", "SYM", "PUNCT", "SPACE"]
    list_motif = []
    nombre_tokens = 0
    nombre_sentences = 0
    with open(path, encoding="utf8") as file:
        text = file.readlines()
        text_clean = clean_text(str(text).lower())
        docs = nlp(text_clean)
        nombre_tokens += len(docs)
        nombre_sentences += len(list(docs.sents))
        for token in docs:
            #si le token est bien un mot on récupère son lemme
            if token.pos_ not in pos_ko:
                if token.is_stop:
                    list_motif.append(token.pos_)
                else:
                    list_motif.append(token.lemma_)

    return list_motif, nombre_tokens, nombre_sentences

In [68]:
starttime = timeit.default_timer()
list_motif_result, list_bigram_motif_result, list_trigram_motif_result = moulinette(path_name, n_most_common_features) 
print("Le temps total d'execution en secondes est de : ", timeit.default_timer() - starttime)



BEGIN PROCESSING CORPUS-----------


  0%|          | 0/200 [00:00<?, ?it/s]



BEGIN PROCESSING NOVEL-----------
2003_Echenoz-Jean_Au-Piano


  0%|          | 1/200 [00:09<32:45,  9.88s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  48753
END PROCESSING NOVEL --------------


PROGRESSION  1.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1863_Feval-Paul_La-Fille-du-Juif-Errant


  1%|          | 2/200 [00:25<43:28, 13.17s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  62340
END PROCESSING NOVEL --------------


PROGRESSION  1.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1893_Segur-Anatole-de_Les-Enfants-de-Paris


  2%|▏         | 3/200 [00:45<53:25, 16.27s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  65702
END PROCESSING NOVEL --------------


PROGRESSION  2.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1839_Stendhal_Le-coffre-et-le-revenant


  2%|▏         | 4/200 [00:46<33:42, 10.32s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  9787
END PROCESSING NOVEL --------------


PROGRESSION  2.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
2012_Winckler-Martin_En-souvenir-d-Andre


  2%|▎         | 5/200 [00:52<28:29,  8.76s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  32711
END PROCESSING NOVEL --------------


PROGRESSION  3.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1882_Feval-Paul_Veillees-de-la-famille


  3%|▎         | 6/200 [01:23<53:03, 16.41s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  94950
END PROCESSING NOVEL --------------


PROGRESSION  3.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1837_Sand-George_La-derniere-Aldini


  4%|▎         | 7/200 [01:44<56:52, 17.68s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  74026
END PROCESSING NOVEL --------------


PROGRESSION  4.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1875_L-Epine-Ernest_a-coups-de-fusil


  4%|▍         | 8/200 [01:51<46:09, 14.43s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  38763
END PROCESSING NOVEL --------------


PROGRESSION  4.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
2010_Echenoz-Jean_Des-eclairs


  4%|▍         | 9/200 [01:58<38:20, 12.05s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  35348
END PROCESSING NOVEL --------------


PROGRESSION  5.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1843_Balzac-Honore-de_Pierrette


  5%|▌         | 10/200 [02:13<40:57, 12.93s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  58779
END PROCESSING NOVEL --------------


PROGRESSION  5.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
2001_Nothomb-Amelie_Cosmetique-de-l-ennemi


  6%|▌         | 11/200 [02:17<32:39, 10.37s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  26584
END PROCESSING NOVEL --------------


PROGRESSION  6.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1963_Beauvoir-Simone-de_La-force-des-choses_1


  6%|▌         | 12/200 [02:40<43:52, 14.00s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  75405
END PROCESSING NOVEL --------------


PROGRESSION  6.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1974_San-Antonio_Mets-ton-doigt-ou-j-ai-mon-doigt


  6%|▋         | 13/200 [02:59<49:08, 15.77s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  72482
END PROCESSING NOVEL --------------


PROGRESSION  7.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1893_Buet-Charles_Guy-Main-Rouge-legende-du-pays-de-Savoie


  7%|▋         | 14/200 [03:03<37:00, 11.94s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  21494
END PROCESSING NOVEL --------------


PROGRESSION  7.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1883_Greville-Henry_Angele


  8%|▊         | 15/200 [03:33<53:34, 17.38s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  97148
END PROCESSING NOVEL --------------


PROGRESSION  8.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1836_Musset-Alfred-de_La-Confession-d-un-enfant-du-siecle


  8%|▊         | 16/200 [04:13<1:14:48, 24.39s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  111233
END PROCESSING NOVEL --------------


PROGRESSION  8.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1846_Balzac-Honore-de_Le-Message


  8%|▊         | 17/200 [04:14<52:36, 17.25s/it]  

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  5770
END PROCESSING NOVEL --------------


PROGRESSION  9.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1926_Leroux-Gaston_Le-coup-d-Etat-de-Cheri-Bibi


  9%|▉         | 18/200 [05:12<1:29:36, 29.54s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  137953
END PROCESSING NOVEL --------------


PROGRESSION  9.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1874_Verne-Jules_Le-Chancellor


 10%|▉         | 19/200 [05:29<1:17:53, 25.82s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  63964
END PROCESSING NOVEL --------------


PROGRESSION  10.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1992_Germain-Sylvie_La-pleurante-des-rues-de-Prague


 10%|█         | 20/200 [05:33<57:50, 19.28s/it]  

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  24106
END PROCESSING NOVEL --------------


PROGRESSION  10.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1966_Perec-Georges_Quel-petit-velo-a-guidon-chrome-au-fond-de-la-cour-


 10%|█         | 21/200 [05:36<42:21, 14.20s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  15898
END PROCESSING NOVEL --------------


PROGRESSION  11.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1935_Veuzit-Max-du_Fille-de-prince


 11%|█         | 22/200 [06:05<55:31, 18.72s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  89191
END PROCESSING NOVEL --------------


PROGRESSION  11.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1864_Erckmann-Chatrian_Histoire-d-un-conscrit-de-1813


 12%|█▏        | 23/200 [06:27<58:03, 19.68s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  80653
END PROCESSING NOVEL --------------


PROGRESSION  12.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1912_Zevaco-Michel_Le-Rival-du-Roi


 12%|█▏        | 24/200 [07:25<1:31:34, 31.22s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  141030
END PROCESSING NOVEL --------------


PROGRESSION  12.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1850_Bassanville-Anais-de_Le-Soir-et-le-matin-de-la-vie-ou-Conseils-aux-jeunes-filles


 12%|█▎        | 25/200 [07:47<1:22:58, 28.45s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  77923
END PROCESSING NOVEL --------------


PROGRESSION  13.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1819_Barthelemy-Hadot-Marie-Adelaide_Laurence-de-Sully-ou-L-ermitage-en-Suisse_(Tome-4)


 13%|█▎        | 26/200 [08:01<1:10:13, 24.22s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  58109
END PROCESSING NOVEL --------------


PROGRESSION  13.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1892_Zola-Emile_La-debacle


 14%|█▎        | 27/200 [10:38<3:04:38, 64.04s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  234339
END PROCESSING NOVEL --------------


PROGRESSION  14.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1886_Mahalin-Paul_La-filleule-de-Lagardere_Tome-2-L-heritiere


 14%|█▍        | 28/200 [11:04<2:30:45, 52.59s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  84454
END PROCESSING NOVEL --------------


PROGRESSION  14.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1843_Sand-George_La-Comtesse-de-Rudolstadt


 14%|█▍        | 29/200 [13:57<4:12:49, 88.71s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  244685
END PROCESSING NOVEL --------------


PROGRESSION  15.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1899_Noir-Louis_Une-chasse-a-courre-au-Pole-nord


 15%|█▌        | 30/200 [14:02<3:00:20, 63.65s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  26310
END PROCESSING NOVEL --------------


PROGRESSION  15.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1846_Reybaud-Louis_Jerome-Paturot-a-la-recherche-d-une-position-sociale_1


 16%|█▌        | 31/200 [14:26<2:25:43, 51.73s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  78130
END PROCESSING NOVEL --------------


PROGRESSION  16.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1923_Istrati-Panait_Les-recits-d-Adrien-Zograffi_I


 16%|█▌        | 32/200 [14:40<1:52:55, 40.33s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  56556
END PROCESSING NOVEL --------------


PROGRESSION  16.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
2015_Germain-Sylvie_a-la-table-des-hommes


 16%|█▋        | 33/200 [14:57<1:32:37, 33.28s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  62014
END PROCESSING NOVEL --------------


PROGRESSION  17.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1945_Calet-Henri_Le-Bouquet


 17%|█▋        | 34/200 [15:25<1:28:13, 31.89s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  87265
END PROCESSING NOVEL --------------


PROGRESSION  17.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1880_Assollant-Alfred_Hyacinthe


 18%|█▊        | 35/200 [15:43<1:16:18, 27.75s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  65762
END PROCESSING NOVEL --------------


PROGRESSION  18.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1866_Stern-Daniel_Nelida-Herve-Julien


 18%|█▊        | 36/200 [16:21<1:23:43, 30.63s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  105819
END PROCESSING NOVEL --------------


PROGRESSION  18.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1986_Tournier-Michel_La-Goutte-d-or


 18%|█▊        | 37/200 [16:41<1:15:02, 27.62s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  74324
END PROCESSING NOVEL --------------


PROGRESSION  19.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1881_Valles-Jules_Le-Bachelier


 19%|█▉        | 38/200 [17:36<1:36:17, 35.66s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  132668
END PROCESSING NOVEL --------------


PROGRESSION  19.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1866_Sand-George_Promenades-autour-d-un-village


 20%|█▉        | 39/200 [17:49<1:17:45, 28.98s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  51939
END PROCESSING NOVEL --------------


PROGRESSION  20.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1855_Sand-George_Melchior


 20%|██        | 40/200 [17:51<55:47, 20.92s/it]  

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  13096
END PROCESSING NOVEL --------------


PROGRESSION  20.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1860_Assollant-Alfred_Histoire-fantastique-du-celebre-Pierrot


 20%|██        | 41/200 [17:57<43:19, 16.35s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  30775
END PROCESSING NOVEL --------------


PROGRESSION  21.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1868_Zaccone-Pierre_Le-Courrier-de-Lyon


 21%|██        | 42/200 [18:28<54:30, 20.70s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  89589
END PROCESSING NOVEL --------------


PROGRESSION  21.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1845_Balzac-Honore-de_Gambara


 22%|██▏       | 43/200 [18:33<41:59, 16.05s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  26112
END PROCESSING NOVEL --------------


PROGRESSION  22.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
2009_Simon-Claude_Archipel-et-Nord


 22%|██▏       | 44/200 [18:34<29:51, 11.48s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  4932
END PROCESSING NOVEL --------------


PROGRESSION  22.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1950_Fourre-Maurice_La-Nuit-du-Rose-Hotel


 22%|██▎       | 45/200 [18:57<38:46, 15.01s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  77211
END PROCESSING NOVEL --------------


PROGRESSION  23.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1859_Dumas-Alexandre_Le-fils-du-forçat


 23%|██▎       | 46/200 [19:19<43:36, 16.99s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  76052
END PROCESSING NOVEL --------------


PROGRESSION  23.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
2008_Vargas-Fred_Un-lieu-incertain


 24%|██▎       | 47/200 [20:07<1:07:34, 26.50s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  125990
END PROCESSING NOVEL --------------


PROGRESSION  24.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
2010_Houellebecq-Michel_La-Carte-et-le-territoire


 24%|██▍       | 48/200 [20:55<1:22:58, 32.75s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  116382
END PROCESSING NOVEL --------------


PROGRESSION  24.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1931_Simenon-Georges_Au-Rendez-vous-des-Terre-Neuvas


 24%|██▍       | 49/200 [21:05<1:05:26, 26.01s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  45650
END PROCESSING NOVEL --------------


PROGRESSION  25.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1854_Nerval-Gerard-de_Promenades-et-souvenirs


 25%|██▌       | 50/200 [21:07<46:50, 18.74s/it]  

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  11404
END PROCESSING NOVEL --------------


PROGRESSION  25.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
2000_Bouraoui-Nina_Garcon-Manque


 26%|██▌       | 51/200 [21:07<32:50, 13.22s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  2688
END PROCESSING NOVEL --------------


PROGRESSION  26.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1919_Leblanc-Maurice_L-ile-aux-Trente-cercueils


 26%|██▌       | 52/200 [21:51<55:22, 22.45s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  114963
END PROCESSING NOVEL --------------


PROGRESSION  26.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1906_Farrere-Claude_L-homme-qui-assassina


 26%|██▋       | 53/200 [22:10<52:16, 21.33s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  69878
END PROCESSING NOVEL --------------


PROGRESSION  27.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1893_Renard-Jules_Coquecigrues


 27%|██▋       | 54/200 [22:19<42:40, 17.54s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  42789
END PROCESSING NOVEL --------------


PROGRESSION  27.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1957_San-Antonio_J-ai-peur-des-mouches


 28%|██▊       | 55/200 [22:28<36:25, 15.07s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  44599
END PROCESSING NOVEL --------------


PROGRESSION  28.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1984_Delly_Des-plaintes-dans-la-nuit


 28%|██▊       | 56/200 [22:55<44:56, 18.72s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  83287
END PROCESSING NOVEL --------------


PROGRESSION  28.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1922_Margueritte-Victor_La-garçonne


 28%|██▊       | 57/200 [23:21<49:34, 20.80s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  84470
END PROCESSING NOVEL --------------


PROGRESSION  29.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1919_Bazin-Rene_Les-Nouveaux-Oberle


 29%|██▉       | 58/200 [24:21<1:17:08, 32.60s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  137122
END PROCESSING NOVEL --------------


PROGRESSION  29.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1827_Stendhal_Armance


 30%|██▉       | 59/200 [24:44<1:10:06, 29.83s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  75607
END PROCESSING NOVEL --------------


PROGRESSION  30.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1922_Margueritte-Victor_Poum_(aventures-d-un-petit-garçon)


 30%|███       | 60/200 [24:52<54:09, 23.21s/it]  

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  40698
END PROCESSING NOVEL --------------


PROGRESSION  30.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
2005_Houellebecq-Michel_La-possibilite-d-une-ile


 30%|███       | 61/200 [25:53<1:20:09, 34.60s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  142797
END PROCESSING NOVEL --------------


PROGRESSION  31.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
2003_Daeninckx-Didier_Je-tue-il


 31%|███       | 62/200 [25:58<59:22, 25.81s/it]  

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  26847
END PROCESSING NOVEL --------------


PROGRESSION  31.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1947_Simenon-Georges_Maigret-et-l-inspecteur-Malgracieux


 32%|███▏      | 63/200 [26:13<51:18, 22.47s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  62396
END PROCESSING NOVEL --------------


PROGRESSION  32.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1837_Sabatier-de-Castres-Camille_La-Colonie-chretienne_histoire-de-plusieurs-deportes-jetes-par-un-naufrage-dans-une-ile-deserte


 32%|███▏      | 64/200 [26:35<50:29, 22.28s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  76332
END PROCESSING NOVEL --------------


PROGRESSION  32.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1888_Guy-de-Maupassant_Pierre-et-Jean


 32%|███▎      | 65/200 [26:54<48:12, 21.43s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  66814
END PROCESSING NOVEL --------------


PROGRESSION  33.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1955_Robbe-Grillet-Alain_Le-Voyeur


 33%|███▎      | 66/200 [27:22<51:45, 23.18s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  85229
END PROCESSING NOVEL --------------


PROGRESSION  33.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1923_Leblanc-Maurice_Les-Huit-Coups-de-l-horloge


 34%|███▎      | 67/200 [27:46<52:13, 23.56s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  80897
END PROCESSING NOVEL --------------


PROGRESSION  34.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
2004_Volodine-Antoine_Bardo-or-not-Bardo


 34%|███▍      | 68/200 [28:02<46:56, 21.33s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  65277
END PROCESSING NOVEL --------------


PROGRESSION  34.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1816_Bournon-Malarme-Charlotte-de_Lancelot-Montagu-ou-le-Resultat-des-bonnes-fortunes_Tome-2


 34%|███▍      | 69/200 [28:10<37:47, 17.31s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  42704
END PROCESSING NOVEL --------------


PROGRESSION  35.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1855_Nerval-Gerard-de_Aurelia


 35%|███▌      | 70/200 [28:14<28:33, 13.18s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  21726
END PROCESSING NOVEL --------------


PROGRESSION  35.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1892_Mendes-Catulle_Luscignole


 36%|███▌      | 71/200 [28:21<24:17, 11.30s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  35946
END PROCESSING NOVEL --------------


PROGRESSION  36.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1956_Thomas-Henri_La-nuit-de-Londres


 36%|███▌      | 72/200 [28:27<21:11,  9.93s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  37042
END PROCESSING NOVEL --------------


PROGRESSION  36.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1978_Perec-Georges_La-Vie-mode-d-emploi


 36%|███▋      | 73/200 [30:29<1:32:00, 43.47s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  203517
END PROCESSING NOVEL --------------


PROGRESSION  37.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
2010_Riboulet-Mathieu_Avec-Bastien


 37%|███▋      | 74/200 [30:35<1:07:44, 32.26s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  29152
END PROCESSING NOVEL --------------


PROGRESSION  37.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1952_Rebatet-Lucien_Les-Deux-Etendards
PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  601304


 38%|███▊      | 75/200 [48:05<11:43:08, 337.50s/it]

END PROCESSING NOVEL --------------


PROGRESSION  38.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1908_Zevaco-Michel_La-Fausta
PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  271384


 38%|███▊      | 76/200 [51:15<10:06:11, 293.32s/it]

END PROCESSING NOVEL --------------


PROGRESSION  38.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1937_Bernanos-Georges_Nouvelle-histoire-de-Mouchette


 38%|███▊      | 77/200 [51:25<7:06:41, 208.15s/it] 

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  39285
END PROCESSING NOVEL --------------


PROGRESSION  39.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1887_Belot-Adolphe_Alphonsine


 39%|███▉      | 78/200 [51:41<5:06:24, 150.69s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  62725
END PROCESSING NOVEL --------------


PROGRESSION  39.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1881_Lemonnier-Camille_Les-charniers


 40%|███▉      | 79/200 [51:54<3:40:12, 109.20s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  47154
END PROCESSING NOVEL --------------


PROGRESSION  40.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1842_Balzac-Honore-de_La-Vendetta


 40%|████      | 80/200 [51:55<2:33:26, 76.72s/it] 

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  6033
END PROCESSING NOVEL --------------


PROGRESSION  40.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1893_Zola-Emile_Le-docteur-Pascal


 40%|████      | 81/200 [52:58<2:24:17, 72.75s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  141771
END PROCESSING NOVEL --------------


PROGRESSION  41.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1961_Delly_Le-roi-des-Andes


 41%|████      | 82/200 [53:27<1:57:17, 59.64s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  90609
END PROCESSING NOVEL --------------


PROGRESSION  41.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1970_Deon-Michel_Les-poneys-sauvages
PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  200658


 42%|████▏     | 83/200 [55:22<2:28:32, 76.17s/it]

END PROCESSING NOVEL --------------


PROGRESSION  42.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1943_Simenon-Georges_Les-dossiers-de-l-Agence-O


 42%|████▏     | 84/200 [57:00<2:39:46, 82.64s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  189442
END PROCESSING NOVEL --------------


PROGRESSION  42.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1958_San-Antonio_La-verite-en-salade


 42%|████▎     | 85/200 [57:08<1:55:45, 60.39s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  41880
END PROCESSING NOVEL --------------


PROGRESSION  43.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1948_Bealu-Marcel_L-Araignee-d-eau


 43%|████▎     | 86/200 [57:15<1:24:12, 44.32s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  36956
END PROCESSING NOVEL --------------


PROGRESSION  43.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1882_Albalat-Antoine_L-inassouvie-roman-intime


 44%|████▎     | 87/200 [57:41<1:13:13, 38.88s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  82857
END PROCESSING NOVEL --------------


PROGRESSION  44.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1973_Simon-Claude_Triptyque


 44%|████▍     | 88/200 [58:00<1:01:21, 32.87s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  68496
END PROCESSING NOVEL --------------


PROGRESSION  44.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1869_Ponson-du-Terrail-Pierre_Le-Forgeron-de-la-Cour-Dieu_Tome-I


 44%|████▍     | 89/200 [58:38<1:03:44, 34.46s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  106464
END PROCESSING NOVEL --------------


PROGRESSION  45.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1973_San-Antonio_J-ai-essaye-on-peut


 45%|████▌     | 90/200 [59:04<58:17, 31.80s/it]  

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  84070
END PROCESSING NOVEL --------------


PROGRESSION  45.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1878_Boisgobey-Fortune-du_Le-crime-de-l-Opera_1


 46%|████▌     | 91/200 [1:00:05<1:13:44, 40.59s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  144605
END PROCESSING NOVEL --------------


PROGRESSION  46.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1878_Malot-Hector_Sans-famille


 46%|████▌     | 92/200 [1:01:34<1:39:30, 55.28s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  148601
END PROCESSING NOVEL --------------


PROGRESSION  46.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1877_Berthet-Elie_L-Incendiaire


 46%|████▋     | 93/200 [1:02:44<1:46:10, 59.54s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  117665
END PROCESSING NOVEL --------------


PROGRESSION  47.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1853_Zaccone-Pierre_Eric-le-mendiant


 47%|████▋     | 94/200 [1:02:51<1:17:22, 43.80s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  27616
END PROCESSING NOVEL --------------


PROGRESSION  47.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1850_Dumas-Alexandre_La-femme-au-collier-de-velours


 48%|████▊     | 95/200 [1:03:21<1:09:35, 39.76s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  72356
END PROCESSING NOVEL --------------


PROGRESSION  48.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1884_Gouraud-Julie_La-petite-maîtresse-de-maison
PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  26
END PROCESSING NOVEL --------------


PROGRESSION  48.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1910_Delly_Esclave-ou-reine


 48%|████▊     | 97/200 [1:03:38<43:31, 25.35s/it]  

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  45784
END PROCESSING NOVEL --------------


PROGRESSION  49.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1880_Marechal-Marie_Un-mariage-a-l-etranger


 49%|████▉     | 98/200 [1:04:40<58:29, 34.41s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  107882
END PROCESSING NOVEL --------------


PROGRESSION  49.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1879_Sauniere-Paul_Le-Legs-du-pendu


 50%|████▉     | 99/200 [1:05:50<1:13:25, 43.62s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  116491
END PROCESSING NOVEL --------------


PROGRESSION  50.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1989_Perec-Georges_L-Infra-ordinaire


 50%|█████     | 100/200 [1:05:56<55:42, 33.42s/it] 

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  21750
END PROCESSING NOVEL --------------


PROGRESSION  50.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1920_Mauriac-François_La-chair-et-le-sang


 50%|█████     | 101/200 [1:06:19<50:10, 30.41s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  57428
END PROCESSING NOVEL --------------


PROGRESSION  51.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1893_Bazin-Rene_Madame-Corentine


 51%|█████     | 102/200 [1:06:44<47:23, 29.02s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  64546
END PROCESSING NOVEL --------------


PROGRESSION  51.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
2013_Daeninckx-Didier_Tetes-de-maures


 52%|█████▏    | 103/200 [1:07:11<45:55, 28.41s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  64368
END PROCESSING NOVEL --------------


PROGRESSION  52.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1858_Veuillot-Louis_L-Honnete-femme


 52%|█████▏    | 104/200 [1:08:19<1:03:56, 39.96s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  114032
END PROCESSING NOVEL --------------


PROGRESSION  52.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1823_Duras-Claire-de-Durfort_Ourika


 52%|█████▎    | 105/200 [1:08:22<45:54, 28.99s/it]  

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  12881
END PROCESSING NOVEL --------------


PROGRESSION  53.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1897_Gyp_Totote


 53%|█████▎    | 106/200 [1:08:43<42:00, 26.81s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  57172
END PROCESSING NOVEL --------------


PROGRESSION  53.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
2014_Rolin-Jean_Les-evenements


 54%|█████▎    | 107/200 [1:08:57<35:28, 22.89s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  40546
END PROCESSING NOVEL --------------


PROGRESSION  54.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
2009_Carrere-Emmanuel_D-autres-vies-que-la-mienne


 54%|█████▍    | 108/200 [1:09:48<48:06, 31.38s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  96077
END PROCESSING NOVEL --------------


PROGRESSION  54.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
2007_Chandernagor-Françoise_La-voyageuse-de-nuit


 55%|█████▍    | 109/200 [1:11:03<1:07:03, 44.21s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  120088
END PROCESSING NOVEL --------------


PROGRESSION  55.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1956_San-Antonio_Fais-gaffe-a-tes-os


 55%|█████▌    | 110/200 [1:11:20<54:24, 36.27s/it]  

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  49178
END PROCESSING NOVEL --------------


PROGRESSION  55.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1856_Feval-Paul_Madame-Gil-Blas-souvenirs-et-aventures-d-une-femme-de-notre-temps_(Volume-4)


 56%|█████▌    | 111/200 [1:11:33<43:27, 29.29s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  40915
END PROCESSING NOVEL --------------


PROGRESSION  56.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1921_Leroux-Gaston_Palas-et-Cheri-Bibi


 56%|█████▌    | 112/200 [1:12:11<46:45, 31.88s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  81901
END PROCESSING NOVEL --------------


PROGRESSION  56.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1931_Simenon-Georges_Monsieur-Gallet-decede


 56%|█████▋    | 113/200 [1:12:29<40:09, 27.69s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  52103
END PROCESSING NOVEL --------------


PROGRESSION  57.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1897_Ivoi-Paul-d-_Cousin-de-Lavarede-


 57%|█████▋    | 114/200 [1:14:27<1:18:17, 54.62s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  158155
END PROCESSING NOVEL --------------


PROGRESSION  57.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1876_Husson-Mme-de_La-Fille-du-rabbin


 57%|█████▊    | 115/200 [1:14:39<59:36, 42.07s/it]  

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  40926
END PROCESSING NOVEL --------------


PROGRESSION  58.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1873_Daudet-Alphonse_Le-Cabecilla


 58%|█████▊    | 116/200 [1:14:40<41:21, 29.54s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  1562
END PROCESSING NOVEL --------------


PROGRESSION  58.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1876_Monnet-Francisque_Une-histoire-au-dessus-du-crocodile


 58%|█████▊    | 117/200 [1:15:18<44:26, 32.13s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  81158
END PROCESSING NOVEL --------------


PROGRESSION  59.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1842_Balzac-Honore-de_Le-Contrat-de-mariage


 59%|█████▉    | 118/200 [1:15:40<39:41, 29.05s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  56443
END PROCESSING NOVEL --------------


PROGRESSION  59.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1966_Delly_Une-mesalliance


 60%|█████▉    | 119/200 [1:16:03<37:00, 27.41s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  57080
END PROCESSING NOVEL --------------


PROGRESSION  60.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1845_Karr-Alphonse_Voyage-autour-de-mon-jardin


 60%|██████    | 120/200 [9:27:18<196:54:39, 8861.00s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  124937
END PROCESSING NOVEL --------------


PROGRESSION  60.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1883_Cherbuliez-Victor_La-ferme-du-Choquard


 60%|██████    | 121/200 [9:28:27<136:34:19, 6223.53s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  152319
END PROCESSING NOVEL --------------


PROGRESSION  61.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1859_Feval-Paul_La-Maison-de-Pilate
PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  188710


 61%|██████    | 122/200 [9:30:05<95:01:25, 4385.71s/it] 

END PROCESSING NOVEL --------------


PROGRESSION  61.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1950_Duras-Marguerite_Un-barrage-contre-le-Pacifique


 62%|██████▏   | 123/200 [9:30:41<65:53:49, 3080.91s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  103404
END PROCESSING NOVEL --------------


PROGRESSION  62.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1874_Feval-Paul_La-Bande-Cadet-Les-Habits-Noirs_Tome-VIII


 62%|██████▏   | 124/200 [9:31:44<45:55:53, 2175.70s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  147439
END PROCESSING NOVEL --------------


PROGRESSION  62.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1948_Barjavel-Rene_Le-diable-l-emporte


 62%|██████▎   | 125/200 [9:32:09<31:52:59, 1530.39s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  86215
END PROCESSING NOVEL --------------


PROGRESSION  63.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1987_Green-Julien_Les-Pays-lointains
PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  398797


 63%|██████▎   | 126/200 [9:39:15<24:38:47, 1199.02s/it]

END PROCESSING NOVEL --------------


PROGRESSION  63.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1898_Lesueur-Daniel_Levres-closes


 64%|██████▎   | 127/200 [9:39:25<17:04:51, 842.35s/it] 

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  39832
END PROCESSING NOVEL --------------


PROGRESSION  64.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
2001_Echenoz-Jean_Jerome-Lindon


 64%|██████▍   | 128/200 [9:39:26<11:48:07, 590.10s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  9927
END PROCESSING NOVEL --------------


PROGRESSION  64.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1863_Capendu-Ernest_Marcof-Le-Malouin


 64%|██████▍   | 129/200 [9:40:18<8:27:12, 428.62s/it] 

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  133147
END PROCESSING NOVEL --------------


PROGRESSION  65.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1869_Goncourt-Edmond-et-Jules-de_Madame-Gervaisais


 65%|██████▌   | 130/200 [9:40:38<5:57:02, 306.03s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  72732
END PROCESSING NOVEL --------------


PROGRESSION  65.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1890_Loti-Pierre_Le-roman-d-un-enfant


 66%|██████▌   | 131/200 [9:40:55<4:12:09, 219.26s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  62626
END PROCESSING NOVEL --------------


PROGRESSION  66.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1858_Feval-Paul_La-fabrique-de-mariages_Vol_I


 66%|██████▌   | 132/200 [9:41:06<2:57:46, 156.86s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  50339
END PROCESSING NOVEL --------------


PROGRESSION  66.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1976_San-Antonio_Sucette-boulevard


 66%|██████▋   | 133/200 [9:41:29<2:10:13, 116.63s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  73375
END PROCESSING NOVEL --------------


PROGRESSION  67.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1865_Robert-Clemence_Le-Baron-de-Trenck
PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  59602


 67%|██████▋   | 134/200 [9:41:47<1:35:35, 86.91s/it] 

END PROCESSING NOVEL --------------


PROGRESSION  67.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1842_Balzac-Honore-de_Une-double-famille


 68%|██████▊   | 135/200 [9:41:53<1:07:51, 62.64s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  30436
END PROCESSING NOVEL --------------


PROGRESSION  68.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1869_Hugo-Victor_L'homme-qui-rit


 68%|██████▊   | 136/200 [9:42:10<52:23, 49.12s/it]  

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  62161
END PROCESSING NOVEL --------------


PROGRESSION  68.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1999_Adam-Olivier_Je-vais-bien-ne-t-en-fais-pas


 68%|██████▊   | 137/200 [9:42:16<37:52, 36.07s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  32237
END PROCESSING NOVEL --------------


PROGRESSION  69.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1867_Assollant-Alfred_Aventures-merveilleuses-mais-authentiques-du-capitaine-Corcoran-Premiere-Partie


 69%|██████▉   | 138/200 [9:42:27<29:26, 28.49s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  51199
END PROCESSING NOVEL --------------


PROGRESSION  69.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1848_Sand-George_François-le-champi


 70%|██████▉   | 139/200 [9:42:42<24:55, 24.51s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  58009
END PROCESSING NOVEL --------------


PROGRESSION  70.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1933_Queneau-Raymond_Le-Chiendent


 70%|███████   | 140/200 [9:43:28<30:53, 30.90s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  118326
END PROCESSING NOVEL --------------


PROGRESSION  70.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
2014_Mathieu-Nicolas_Aux-animaux-la-guerre


 70%|███████   | 141/200 [9:44:24<37:55, 38.56s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  125365
END PROCESSING NOVEL --------------


PROGRESSION  71.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1893_Courteline-Georges_Messieurs-les-ronds-de-cuir


 71%|███████   | 142/200 [9:44:33<28:49, 29.83s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  45080
END PROCESSING NOVEL --------------


PROGRESSION  71.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1846_Balzac-Honore-de_Autre-etude-de-femme


 72%|███████▏  | 143/200 [9:44:37<20:45, 21.85s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  20241
END PROCESSING NOVEL --------------


PROGRESSION  72.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1862_Chevalier-H-Emile_Les-Nez-Perces


 72%|███████▏  | 144/200 [9:44:56<19:40, 21.08s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  73565
END PROCESSING NOVEL --------------


PROGRESSION  72.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1908_Barbusse-Henri_L-enfer


 72%|███████▎  | 145/200 [9:45:27<22:05, 24.10s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  93999
END PROCESSING NOVEL --------------


PROGRESSION  73.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1920_Leroux-Gaston_Aventures-effroyables-de-M-Herbert-de-Renich_Tome-I-Le-Capitaine-Hyx


 73%|███████▎  | 146/200 [9:46:02<24:43, 27.47s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  99123
END PROCESSING NOVEL --------------


PROGRESSION  73.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1878_La-Blanchere-Henri-de_Les-derniers-Peaux-Rouges-Le-tresor-de-Montcalm


 74%|███████▎  | 147/200 [9:46:10<19:03, 21.58s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  36690
END PROCESSING NOVEL --------------


PROGRESSION  74.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1975_San-Antonio_Certaines-l-aiment-chauve


 74%|███████▍  | 148/200 [9:46:30<18:06, 20.90s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  67596
END PROCESSING NOVEL --------------


PROGRESSION  74.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1901_Mael-Pierre_Un-mousse-de-Surcouf


 74%|███████▍  | 149/200 [9:46:48<17:14, 20.28s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  73592
END PROCESSING NOVEL --------------


PROGRESSION  75.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1934_Montherlant-Henry-de_Les-celibataires


 75%|███████▌  | 150/200 [9:47:13<17:55, 21.51s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  85747
END PROCESSING NOVEL --------------


PROGRESSION  75.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1839_Dumas-Alexandre_Aventures-de-Lyderic


 76%|███████▌  | 151/200 [9:47:18<13:37, 16.68s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  29747
END PROCESSING NOVEL --------------


PROGRESSION  76.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1931_Leblanc-Maurice_La-Barre-y-va


 76%|███████▌  | 152/200 [9:47:35<13:22, 16.72s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  65022
END PROCESSING NOVEL --------------


PROGRESSION  76.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1900_Le-Roy-Eugene_Jacquou-Le-Croquant


 76%|███████▋  | 153/200 [9:48:33<22:50, 29.15s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  132561
END PROCESSING NOVEL --------------


PROGRESSION  77.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1942_Veuzit-Max-du_Moineau-en-cage


 77%|███████▋  | 154/200 [9:49:01<22:02, 28.75s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  83066
END PROCESSING NOVEL --------------


PROGRESSION  77.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1913_Proust-Marcel_Du-cote-de-chez-Swann-A-la-recherche-du-temps-perdu


 78%|███████▊  | 155/200 [9:51:02<42:24, 56.55s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  202562
END PROCESSING NOVEL --------------


PROGRESSION  78.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1864_Ribelle-Charles-de_Les-Confidences-de-Gribouille


 78%|███████▊  | 156/200 [9:51:11<31:00, 42.28s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  42705
END PROCESSING NOVEL --------------


PROGRESSION  78.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1849_Sue-Eugene_Les-Mysteres-du-peuple_Tome-II


 78%|███████▊  | 157/200 [9:51:48<29:03, 40.56s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  108821
END PROCESSING NOVEL --------------


PROGRESSION  79.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1888_Boisgobey-Fortune-du_L-Oeil-de-chat_Tome1


 79%|███████▉  | 158/200 [9:52:21<26:44, 38.19s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  99309
END PROCESSING NOVEL --------------


PROGRESSION  79.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
2014_Echenoz-Jean_Caprice-de-la-reine


 80%|███████▉  | 159/200 [9:52:24<18:59, 27.79s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  22011
END PROCESSING NOVEL --------------


PROGRESSION  80.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1926_Kessel-Joseph_Les-Captifs


 80%|████████  | 160/200 [9:52:39<15:53, 23.83s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  60236
END PROCESSING NOVEL --------------


PROGRESSION  80.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1971_Dhotel-Andre_L-honorable-Monsieur-Jacques


 80%|████████  | 161/200 [9:53:15<17:57, 27.64s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  102885
END PROCESSING NOVEL --------------


PROGRESSION  81.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1898_Dondel-Du-Faouedic-Noemie_Voyages-loin-de-ma-chambre_tome-2


 81%|████████  | 162/200 [9:53:45<17:49, 28.15s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  88145
END PROCESSING NOVEL --------------


PROGRESSION  81.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1874_Fleuriot-Zenaide_En-conge


 82%|████████▏ | 163/200 [9:53:53<13:48, 22.38s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  44718
END PROCESSING NOVEL --------------


PROGRESSION  82.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1917_Bernede-Arthur_Judex


 82%|████████▏ | 164/200 [9:55:57<31:36, 52.67s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  149168
END PROCESSING NOVEL --------------


PROGRESSION  82.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1926_Bourget-Paul_Le-danseur-mondain


 82%|████████▎ | 165/200 [9:56:10<23:50, 40.86s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  56461
END PROCESSING NOVEL --------------


PROGRESSION  83.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1952_Calet-Henri_Un-Grand-Voyage


 83%|████████▎ | 166/200 [9:56:24<18:29, 32.64s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  58965
END PROCESSING NOVEL --------------


PROGRESSION  83.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1894_Cim-Albert_Histoire-d-un-baiser


 84%|████████▎ | 167/200 [9:56:32<13:56, 25.35s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  43272
END PROCESSING NOVEL --------------


PROGRESSION  84.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1883_Merouvel-Charles_Angele-Meraud


 84%|████████▍ | 168/200 [9:57:13<16:02, 30.06s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  118311
END PROCESSING NOVEL --------------


PROGRESSION  84.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1866_Stapleaux-Leopold_Le-Chateau-de-la-rage.


 84%|████████▍ | 169/200 [9:58:23<21:39, 41.90s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  157357
END PROCESSING NOVEL --------------


PROGRESSION  85.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
2004_Grimbert-Philippe_Un-secret


 85%|████████▌ | 170/200 [9:58:28<15:26, 30.89s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  29654
END PROCESSING NOVEL --------------


PROGRESSION  85.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1884_Bourges-Elemir_Le-Crepuscule-des-Dieux


 86%|████████▌ | 171/200 [9:58:49<13:29, 27.92s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  74304
END PROCESSING NOVEL --------------


PROGRESSION  86.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1958_Delly_Lysis


 86%|████████▌ | 172/200 [9:58:58<10:25, 22.34s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  42621
END PROCESSING NOVEL --------------


PROGRESSION  86.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1977_Gary-Romain_Charge-d-ame


 86%|████████▋ | 173/200 [9:59:28<11:06, 24.70s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  90842
END PROCESSING NOVEL --------------


PROGRESSION  87.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1848_Woillez-Catherine_Edma-et-Marguerite-ou-les-Ruines-de-Chatillon-d-Azergues


 87%|████████▋ | 174/200 [9:59:45<09:36, 22.19s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  62561
END PROCESSING NOVEL --------------


PROGRESSION  87.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1993_Rolin-Olivier_L-Invention-du-monde


 88%|████████▊ | 175/200 [10:02:19<25:42, 61.71s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  232259
END PROCESSING NOVEL --------------


PROGRESSION  88.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1892_Lesueur-Daniel_Passion-slave


 88%|████████▊ | 176/200 [10:02:45<20:26, 51.11s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  83968
END PROCESSING NOVEL --------------


PROGRESSION  88.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1959_Troyat-Henri_Les-compagnons-du-coquelicot


 88%|████████▊ | 177/200 [10:03:31<19:01, 49.64s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  118581
END PROCESSING NOVEL --------------


PROGRESSION  89.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1857_Ponson-du-Terrail-Pierre_L-Heritage-Mysterieux


 89%|████████▉ | 178/200 [10:05:52<28:14, 77.03s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  232810
END PROCESSING NOVEL --------------


PROGRESSION  89.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
2012_Nothomb-Amelie_Barbe-bleue


 90%|████████▉ | 179/200 [10:05:58<19:27, 55.61s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  30747
END PROCESSING NOVEL --------------


PROGRESSION  90.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1875_Marmette-Joseph_La-fiancee-du-rebelle


 90%|█████████ | 180/200 [10:06:20<15:14, 45.72s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  78576
END PROCESSING NOVEL --------------


PROGRESSION  90.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1984_Daeninckx-Didier_Le-der-des-ders


 90%|█████████ | 181/200 [10:06:40<12:00, 37.90s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  70482
END PROCESSING NOVEL --------------


PROGRESSION  91.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1884_Chavette-Eugene_La-conquete-d-une-cuisiniere_II-Le-tombeur-des-cranes


 91%|█████████ | 182/200 [10:07:05<10:14, 34.16s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  83522
END PROCESSING NOVEL --------------


PROGRESSION  91.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1876_Zola-Emile_Son-excellence-Eugene-Rougon


 92%|█████████▏| 183/200 [10:08:17<12:53, 45.49s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  157958
END PROCESSING NOVEL --------------


PROGRESSION  92.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1958_Very-Pierre_Les-disparus-de-Saint-Agil


 92%|█████████▏| 184/200 [10:08:39<10:12, 38.28s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  75868
END PROCESSING NOVEL --------------


PROGRESSION  92.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1908_Delly_Une-femme-superieure


 92%|█████████▎| 185/200 [10:08:58<08:07, 32.47s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  70141
END PROCESSING NOVEL --------------


PROGRESSION  93.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1883_Pontmartin-Armand-de_Les-Corbeaux-du-Gevaudan


 93%|█████████▎| 186/200 [10:09:31<07:39, 32.85s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  99950
END PROCESSING NOVEL --------------


PROGRESSION  93.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1859_Grangier-Mathilde_Amour-et-devoir


 94%|█████████▎| 187/200 [10:09:56<06:34, 30.32s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  79962
END PROCESSING NOVEL --------------


PROGRESSION  94.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1905_Farrere-Claude_Les-civilises


 94%|█████████▍| 188/200 [10:10:21<05:45, 28.80s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  75053
END PROCESSING NOVEL --------------


PROGRESSION  94.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1930_Galopin-Arnould_Le-sergent-Bucaille


 94%|█████████▍| 189/200 [10:10:53<05:25, 29.62s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  73609
END PROCESSING NOVEL --------------


PROGRESSION  95.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1959_Queneau-Raymond_Zazie-Dans-Le-Metro


 95%|█████████▌| 190/200 [10:11:14<04:30, 27.05s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  57877
END PROCESSING NOVEL --------------


PROGRESSION  95.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1865_Feval-Paul_Les-habits-noirs_Tome-II


 96%|█████████▌| 191/200 [10:14:23<11:21, 75.76s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  208898
END PROCESSING NOVEL --------------


PROGRESSION  96.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1994_Chevillard-Eric_Prehistoire


 96%|█████████▌| 192/200 [10:14:36<07:35, 56.92s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  39329
END PROCESSING NOVEL --------------


PROGRESSION  96.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1847_Sandeau-Jules_Mademoiselle-de-la-Seigliere_Volume-1


 96%|█████████▋| 193/200 [10:14:45<04:58, 42.63s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  32927
END PROCESSING NOVEL --------------


PROGRESSION  97.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1951_Simenon-Georges_Maigret-et-la-grande-perche


 97%|█████████▋| 194/200 [10:15:03<03:31, 35.20s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  52328
END PROCESSING NOVEL --------------


PROGRESSION  97.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1867_Houssaye-Arsene_La-Pantoufle-de-Cendrillon-ou-Suzanne-aux-coquelicots


 98%|█████████▊| 195/200 [10:15:05<02:06, 25.21s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  9771
END PROCESSING NOVEL --------------


PROGRESSION  98.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1985_Millet-Richard_Sept-passions-singulieres


 98%|█████████▊| 196/200 [10:15:21<01:29, 22.37s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  47030
END PROCESSING NOVEL --------------


PROGRESSION  98.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1973_Rolin-Dominique_Lettre-au-vieil-homme


 98%|█████████▊| 197/200 [10:16:16<01:36, 32.15s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  101093
END PROCESSING NOVEL --------------


PROGRESSION  99.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1971_Simenon-Georges_Maigret-et-l-homme-tout-seul


 99%|█████████▉| 198/200 [10:16:34<00:56, 28.07s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  52504
END PROCESSING NOVEL --------------


PROGRESSION  99.5 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1897_Lesueur-Daniel_Le-mariage-de-Gabrielle


100%|█████████▉| 199/200 [10:17:00<00:27, 27.36s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  63769
END PROCESSING NOVEL --------------


PROGRESSION  100.0 % COMPLETED



BEGIN PROCESSING NOVEL-----------
1882_Riviere-Henri_Le-Combat-de-la-vie-Mme-Naper


100%|██████████| 200/200 [10:18:00<00:00, 185.40s/it]

PIPELINE SPACY ----------- OK
NOMBRE TOKENS =  106435
END PROCESSING NOVEL --------------


PROGRESSION  100.5 % COMPLETED


 GET LISTS RESULTS -----------






 NOMBRE TOTAL TOKENS =  16412278

 NOMBRE TOTAL SENTENCES =  1022513

 RETURN LISTS RESULTS -----------


 END PROCESSING CORPUS --------------


Le temps total d'execution en secondes est de :  7727.434728583001


In [69]:
list_motif_result

['DET',
 'PRON',
 'ADP',
 'ADV',
 'CCONJ',
 'SCONJ',
 'VERB',
 'AUX',
 'à',
 'ADJ',
 'éter',
 '-',
 'NOUN',
 'faire',
 'bien',
 '–',
 'même',
 'voir',
 'où',
 'être',
 'avoir',
 'vouloir',
 'il',
 'venir',
 'être',
 'petit',
 'homme',
 'savoir',
 'grand',
 'prendre',
 'là',
 'jour',
 'aller',
 'rien',
 'femme',
 'dire',
 'main',
 'jeune',
 'bon',
 'heure',
 'mettre',
 'été',
 'falloir',
 'temps',
 'non',
 'oeil',
 'après',
 'jamais',
 'fois',
 'croire',
 'ça',
 'pouvoir',
 'chose',
 'étaier',
 'fille',
 'passer',
 'tête',
 'devoir',
 'entendre',
 'coup',
 'porte',
 'trouver',
 '-t',
 'moment',
 'vie',
 'demander',
 'enfant',
 'lui',
 'mère',
 '-ce',
 'père',
 'oui',
 'voix',
 'donner',
 'trop',
 'point',
 '—',
 'monsieur',
 'vieux',
 'était',
 'attendre',
 'monde',
 'Monsieur',
 'air',
 'mot',
 'cœur',
 'déjà',
 'maison',
 'm.',
 'nuit',
 'vous',
 'jusqu’',
 'mort',
 'ici',
 'regarder',
 'sortir',
 'an',
 'long',
 'reprendre',
 'fort',
 'PROPN',
 'contre',
 'devenir',


In [70]:
list_bigram_motif_result

['ADP_DET',
 'PRON_PRON',
 'PRON_AUX',
 'SCONJ_PRON',
 'ADP_PRON',
 'PRON_VERB',
 'PRON_ADV',
 'à_DET',
 'CCONJ_PRON',
 'ADV_PRON',
 'CCONJ_DET',
 'ADV_ADP',
 'SCONJ_DET',
 'ADV_DET',
 'CCONJ_ADP',
 'VERB_DET',
 'PRON_DET',
 'PRON_éter',
 'ADV_VERB',
 'VERB_ADV',
 'PRON_ADP',
 'ADJ_DET',
 'ADV_ADV',
 'ADP_ADV',
 'AUX_ADV',
 'DET_ADJ',
 'ADV_AUX',
 'à_PRON',
 'DET_ADV',
 'VERB_ADP',
 'AUX_DET',
 'AUX_VERB',
 'ADV_SCONJ',
 'VERB_PRON',
 'PRON_faire',
 'DET_NOUN',
 'CCONJ_ADV',
 'CCONJ_SCONJ',
 'ADP_ADJ',
 'ADV_à',
 'PRON_avoir',
 'DET_petit',
 'ADP_ADP',
 'PRON_voir',
 'DET_main',
 'PRON_être',
 'où_PRON',
 'ADJ_PRON',
 'ADP_NOUN',
 'DET_homme',
 'DET_PRON',
 'VERB_SCONJ',
 'NOUN_ADP',
 'PRON_vouloir',
 'ADP_SCONJ',
 'SCONJ_ADP',
 'DET_grand',
 'faire_DET',
 'éter_DET',
 'PRON_CCONJ',
 'éter_ADV',
 'DET_femme',
 'DET_jeune',
 'PRON_falloir',
 'DET_oeil',
 'AUX_ADP',
 'DET_jour',
 'PRON_dire',
 'venir_ADP',
 'VERB_-',
 'DET_tête',
 'ADV_CCONJ',
 'DET_vie',
 'PRON_savoir',
 'AUX_é

In [71]:
list_trigram_motif_result

['PRON_PRON_PRON',
 'PRON_PRON_AUX',
 'PRON_PRON_VERB',
 'SCONJ_PRON_PRON',
 'PRON_ADV_PRON',
 'ADP_PRON_PRON',
 'PRON_VERB_DET',
 'CCONJ_PRON_PRON',
 'SCONJ_PRON_ADV',
 'PRON_ADV_VERB',
 'CCONJ_ADP_DET',
 'ADV_ADP_DET',
 'PRON_AUX_VERB',
 'ADV_VERB_ADV',
 'PRON_AUX_DET',
 'PRON_ADV_AUX',
 'SCONJ_PRON_AUX',
 'PRON_VERB_ADV',
 'ADV_AUX_ADV',
 'PRON_VERB_ADP',
 'PRON_ADP_DET',
 'ADV_PRON_PRON',
 'CCONJ_PRON_ADV',
 'ADV_SCONJ_PRON',
 'PRON_AUX_ADV',
 'CCONJ_SCONJ_PRON',
 'ADV_PRON_VERB',
 'ADV_PRON_AUX',
 'ADP_PRON_ADP',
 'ADP_PRON_DET',
 'PRON_VERB_PRON',
 'PRON_PRON_ADV',
 'SCONJ_PRON_VERB',
 'ADP_ADJ_DET',
 'VERB_ADP_DET',
 'CCONJ_PRON_AUX',
 'VERB_-_être',
 'VERB_ADV_DET',
 'PRON_éter_DET',
 'ADP_SCONJ_PRON',
 'ADP_DET_ADJ',
 'DET_NOUN_ADP',
 'VERB_SCONJ_PRON',
 'PRON_PRON_éter',
 'ADP_DET_NOUN',
 'AUX_VERB_DET',
 'PRON_VERB_SCONJ',
 'CCONJ_PRON_VERB',
 'ADV_SCONJ_DET',
 'ADV_ADP_PRON',
 'ADV_à_DET',
 'PRON_AUX_ADP',
 'ADP_ADV_ADP',
 'ADP_PRON_CCONJ',
 'ADV_éter_ADV',
 'ADJ_PRON_

In [72]:
#1892_Zola-Emile_La-debacle - 13.5 % COMPLETED

In [None]:
import os
import shutil

In [None]:
src = r"corpus_main_txt/"
dest = r"corpus_temp/"

In [None]:
def get_200_novels(src, dest):
    i = 0
    list_dir_200 = os.listdir(src)[:200]
    for file in list_dir_200:
        source = src + file
        destination = dest + file
        # copy only files
        if os.path.isfile(source):
            shutil.copy(source, destination)

In [None]:
get_200_novels(src, dest)