# Diving into French Presidential Discourses
## Anne Hidalgo (AH)

In [None]:
!pip install minet
!pip install pdfminer.six
!pip install unidecode
!python -m spacy download fr_core_news_sm

In [1]:
import pandas as pd
from pdfminer.high_level import extract_text
from gensim.test.utils import datapath
from gensim import utils
import spacy
from collections import Counter
import unidecode
import gensim.models

In [2]:
## create a .csv of AH's Twitter activity since 27 Jan 2022
!minet tw scrape tweets "(from:Anne_Hidalgo) until:2022-03-27 since:2022-01-27" > tweets_AH.csv

## convert the .csv file in a data frame using pandas
df_tw_AH = pd.read_csv("./tweets_AH.csv")

## create a list of tweets selecting the 'text' column of the data frame
list_tw_AH = df_tw_AH['text'].values.tolist()
len(list_tw_AH)

Searching for "(from:Anne_Hidalgo) until:2022-03-27 since:2022-01-27"
Collecting tweet: 632 tweets [00:28, 22.28 tweets/s, queries=1, tokens=1]


632

In [3]:
## retrieve AH's affiliates1 Twitter activity
!minet tw scrape tweets "(from:faureolivier OR from:Johanna_Rolland OR from:BorisVallaud OR from:Valerie_Rabault OR from:PatrickKanner OR from:RachidTemal OR from:RemiFeraud OR from:PJouvet OR from:SebVincini OR from:GabrielleSiry OR from:algrain_paris10 OR from:ACORDEBARD OR from:RemiFeraud OR from:PotierDominique) until:2022-03-27 since:2022-01-27" > tw_AH_aff_all.csv

## convert AH's affiliates' tweets in a list
df_tw_AH_aff_all = pd.read_csv("tw_AH_aff_all.csv")
list_tw_AH_aff_all = df_tw_AH_aff_all['text'].values.tolist()
print(list_tw_AH_aff_all[0])
print(len(list_tw_AH_aff_all))

Searching for "(from:faureolivier OR from:Johanna_Rolland OR from:BorisVallaud OR from:Valerie_Rabault OR from:PatrickKanner OR from:RachidTemal OR from:RemiFeraud OR from:PJouvet OR from:SebVincini OR from:GabrielleSiry OR from:algrain_paris10 OR from:ACORDEBARD OR from:RemiFeraud OR from:PotierDominique) until:2022-03-27 since:2022-01-27"
Collecting tweet: 1744 tweets [01:10, 24.70 tweets/s, queries=1, tokens=1]
Une excellente candidate pour porter les couleurs de la gauche avec conviction aux prochaines législatives. Très heureux d’avoir participé à cette belle soirée rue Championnet pour soutenir @GabrielleSiry et son suppléant @Karim_Ziady dans la 3e circonscription de #Paris. « GabrielleSiry: Lancement de mon comité de soutien pour les élections législatives : merci à @RemiFeraud et @SergeOrru pour leur soutien : leur engagement est une inspiration, à la hauteur des combats que je souhaite mener pour notre circonscription ! https://twitter.com/GabrielleSiry/status/150780163186960

In [4]:
## convert .csv affiliates' tweets in a list
df_tw_AH_aff_all = pd.read_csv("tw_AH_aff_all.csv")
list_tw_AH_aff_all = df_tw_AH_aff_all['text'].values.tolist()
print(list_tw_AH_aff_all[0])
print(len(list_tw_AH_aff_all))

Une excellente candidate pour porter les couleurs de la gauche avec conviction aux prochaines législatives. Très heureux d’avoir participé à cette belle soirée rue Championnet pour soutenir @GabrielleSiry et son suppléant @Karim_Ziady dans la 3e circonscription de #Paris. « GabrielleSiry: Lancement de mon comité de soutien pour les élections législatives : merci à @RemiFeraud et @SergeOrru pour leur soutien : leur engagement est une inspiration, à la hauteur des combats que je souhaite mener pour notre circonscription ! https://twitter.com/GabrielleSiry/status/1507801631869607943/photo/1 — https://twitter.com/gabriellesiry/status/1507801631869607943 »
1744


In [5]:
## merge AH's and AH's affiliates lists
list_tw_AH_all = list_tw_AH + list_tw_AH_aff_all

In [6]:
## i retrieve a string from the pdf of AH's manifesto using extract_text of the pdfminer package
## the cleaning process is specific for this manifesto and it depends on the output of extract_text
manif_AH = extract_text('/Users/simonemariaparazzoli/Documents/Università/Sciences Po/Diving into public digital spaces/research/manifesto_hidalgo.pdf')
manif_clean_AH = manif_AH.replace(' .','')
manif_clean_AH = manif_clean_AH.replace('   ','')
manif_clean_AH = manif_clean_AH.replace('\n\n','')
manif_clean_AH = manif_clean_AH.replace('\n','')
manif_clean_AH = manif_clean_AH.replace('. ','---')
manif_clean_AH = manif_clean_AH.replace(' _ ','---')
manif_clean_AH = manif_clean_AH.replace('\x0c',' ')
#print(repr(manif_clean_AH))

## convert the string of the manifesto into a list
list_manif_AH = manif_clean_AH.split("---")
list_manif_AH = [s for s in list_manif_AH if len(s)>20]
len(list_manif_AH)
#print(list_manif_EM)'''

472

In [18]:
## merge the two lists of tweets and of the manifesto 
list_AH = list_tw_AH_all + list_tw_AH_all + list_manif_AH
len(list_AH)

5224

In [19]:
## load a spacy model to retrieve stop words
nlp = spacy.load("fr_core_news_sm")
stop_words_fr = nlp.Defaults.stop_words
#new_sw = ["avec","la","les","le","pour","un","une","nous","ete","et","je"]
#stop_words_fr.add(new_sw)

## clean the list of tweets and manifesto to get rid of useless words and make the list content homogeneous
list_AH_clean = []
for i in list_AH:
    doc = nlp(i)
    tokens = [unidecode.unidecode(token.text).lower()for token in doc 
              if ( token.text not in stop_words_fr and
                  len(token.text)>1 and
                  token.like_url == False )]
    tokens_joined = ' '.join(tokens)
    list_AH_clean.append(tokens_joined)

In [20]:
## test the output of the cleaning process
print(list_AH[2401])
print("---")
print(list_AH_clean[2401])

Le but réel de la réforme Macron n’est pas de sauver notre retraite, mais de la réduire parce qu’il considère, ce sont ses mots, que la protection sociale coûte « un pognon de dingue » et qu’elle est inefficace.
#HidalgoToulouse
---
le but reel reforme macron sauver retraite reduire considere mots protection sociale coute pognon dingue inefficace hidalgotoulouse


In [21]:
## prepare the corpus as a class
class MyCorpus_AH:

    def __iter__(self):
        for i in list_AH_clean:
            # assume there's one document per line, tokens separated by whitespace
            yield utils.simple_preprocess(i,min_len=3)

In [57]:
## train the word embeddings model_AH
sentences = MyCorpus_AH()
model_AH = gensim.models.Word2Vec(sentences=sentences, min_count=10, vector_size=300, epochs=100)

In [58]:
## transform the corpus list (that is made of tweets and sentences from the manifesto)
## in a list containing all the words of the corpus as elements of the list
words_AH = []

for i in list_AH_clean:
    i_split = i.split(' ') #transform the i document into a list (split at blank space)
    words_AH.extend(i_split)

## clean the list of tokens
words_AH_clean = [x for x in words_AH 
                   if x not in stop_words_fr
                   if x != "\n\n"
                   if len(x)>1]

In [59]:
## find the 30 most common words using Counter
words_freq_AH = Counter(words_AH_clean)
common_words_AH = words_freq_AH.most_common(30)
print(common_words_AH)

[('@anne_hidalgo', 938), ('france', 743), ('hidalgo2022', 679), ('ukraine', 542), ('macron', 456), ('ans', 428), ('gauche', 424), ('contre', 409), ('francais', 395), ('soutien', 340), ('faire', 326), ('@2022avechidalgo', 320), ('projet', 307), ('paris', 305), ('reunir', 274), ('politique', 269), ('femmes', 263), ('peuple', 261), ('guerre', 260), ("aujourd'hui", 245), ('europe', 244), ('face', 244), ('poutine', 244), ('sociale', 240), ('pays', 236), ('faut', 233), ('republique', 230), ('presidentielle', 225), ('vie', 224), ('soir', 214)]


In [60]:
## first attempt with the most_similar function on our corpus using our model_AH
result = model_AH.wv.most_similar(positive=['societe'], topn=30)
print(result)

[('precieux', 0.381132572889328), ('aines', 0.36713463068008423), ('bienveillante', 0.3328389823436737), ('civile', 0.3111730217933655), ('inclusive', 0.29272687435150146), ('republique', 0.2884858250617981), ('cout', 0.2677302360534668), ('changions', 0.26369690895080566), ('vision', 0.2584975063800812), ('vieillir', 0.25551047921180725), ('champ', 0.25428423285484314), ('renforcement', 0.25345155596733093), ('decider', 0.24807973206043243), ('totalement', 0.2474924623966217), ('payes', 0.24624770879745483), ('permettent', 0.2454967498779297), ('offrir', 0.2447982281446457), ('cap', 0.24459949135780334), ('seniors', 0.24281750619411469), ('soient', 0.2417803406715393), ('changeons', 0.2393186092376709), ('commence', 0.23789656162261963), ('outil', 0.23006369173526764), ('agriculture', 0.2299758791923523), ('bloc', 0.2287089228630066), ('reve', 0.2283480167388916), ('civique', 0.2282690554857254), ('redonner', 0.22751165926456451), ('tourner', 0.22581610083580017), ('donnerai', 0.22540

In [61]:
print(len(list_tw_AH_all),',',len(list_AH_clean))

2376 , 5224


In [62]:
result = model_AH.wv.most_similar(positive=['france'], topn=20)
print(result)

[('decidez', 0.34071430563926697), ('rappelez', 0.3145918548107147), ('valeurs', 0.3075127601623535), ('fidelite', 0.3073122799396515), ('parce', 0.2852688729763031), ('montpellier', 0.28386035561561584), ('deciderez', 0.26342594623565674), ('anne_hidalgo', 0.2577950656414032), ('mdelafosse', 0.2573317885398865), ('reunir', 0.2567092478275299), ('gagne', 0.2513056695461273), ('reunirlafrance', 0.2433544397354126), ('jlmelenchon', 0.238833487033844), ('experience', 0.23153309524059296), ('democratie', 0.22372443974018097), ('democrate', 0.22215700149536133), ('gulsenyil', 0.21858248114585876), ('afrique', 0.21781226992607117), ('bernardjomier', 0.21729089319705963), ('fidele', 0.2145492434501648)]


In [63]:
result = model_AH.wv.most_similar(positive=['etat'], topn=20)
print(result)

[('ukrainienne', 0.2812739610671997), ('devenir', 0.2812518775463104), ('pouvoirs', 0.2770009934902191), ('ethique', 0.2624605894088745), ('garantir', 0.2600233256816864), ('urbains', 0.25963282585144043), ('regions', 0.2504827082157135), ('lien', 0.24783118069171906), ('autonomie', 0.24675682187080383), ('niveau', 0.24376583099365234), ('nation', 0.24242480099201202), ('ruraux', 0.23415204882621765), ('handicap', 0.23285698890686035), ('collectivites', 0.2304406315088272), ('chlordecone', 0.23029577732086182), ('heros', 0.22810378670692444), ('issus', 0.22685670852661133), ('territoires', 0.22621211409568787), ('meme', 0.22605392336845398), ('honneur', 0.2207166701555252)]


In [64]:
result = model_AH.wv.most_similar(positive=['souverainete'], topn=20)
print(result)

[('agriculteurs', 0.47293615341186523), ('affirmer', 0.41496503353118896), ('energetique', 0.3421960175037384), ('omelchenkovadym', 0.341889888048172), ('menace', 0.33586040139198303), ('perdu', 0.334547758102417), ('alimentaire', 0.33399030566215515), ('independance', 0.3274781405925751), ('defense', 0.3123176395893097), ('arme', 0.308266282081604), ('modele', 0.3074202835559845), ('economique', 0.3069272041320801), ('metropolitain', 0.30682408809661865), ('chaque', 0.30566930770874023), ('international', 0.29368361830711365), ('issus', 0.289353609085083), ('rester', 0.2850973308086395), ('matiere', 0.2848382592201233), ('chine', 0.2835574448108673), ('commencant', 0.2751500904560089)]


In [65]:
result = model_AH.wv.most_similar(positive=['president'], topn=20)
print(result)

[('volodymyr', 0.38224995136260986), ('emmanuelmacron', 0.37889397144317627), ('presidence', 0.3377213478088379), ('refuser', 0.3224511742591858), ('zelensky', 0.3127437233924866), ('presidents', 0.30689921975135803), ('vice', 0.2935457229614258), ('etre', 0.2934006154537201), ('organise', 0.2911805808544159), ('ministres', 0.2884267270565033), ('exclusion', 0.2780114412307739), ('courageux', 0.2721307575702667), ('representant', 0.26361098885536194), ('debat', 0.2573014199733734), ('affirme', 0.2561261057853699), ('jacques', 0.25586485862731934), ('ecouter', 0.24847306311130524), ('zelenskyyua', 0.2432822734117508), ('proposee', 0.24154874682426453), ('rafle', 0.2369978129863739)]


In [66]:
result = model_AH.wv.most_similar(positive=['politique'], topn=20)
print(result)

[('ralliements', 0.2832810580730438), ('strategie', 0.2407638132572174), ('question', 0.23764203488826752), ('pilier', 0.2367904931306839), ('woerth', 0.22274112701416016), ('prend', 0.22060935199260712), ('communs', 0.21880710124969482), ('convictions', 0.2117546945810318), ('antisemitisme', 0.20693084597587585), ('quotidienne', 0.2067946344614029), ('ethique', 0.20551390945911407), ('manque', 0.20505304634571075), ('baisser', 0.20481714606285095), ('cavousf', 0.2040146440267563), ('politiques', 0.2038559764623642), ('j_jaures', 0.20306405425071716), ('presidente', 0.20253965258598328), ('realiste', 0.20054641366004944), ('incarne', 0.19577239453792572), ('morale', 0.19092325866222382)]


In [67]:
result = model_AH.wv.most_similar(positive=['droit'], topn=20)
print(result)

[('disposer', 0.3706890940666199), ('corps', 0.3094838261604309), ('destin', 0.30009230971336365), ('soient', 0.28827783465385437), ('incarner', 0.2796982526779175), ('directan', 0.2705259919166565), ('projetmacron', 0.25089362263679504), ('claire', 0.23859640955924988), ('disposition', 0.23431214690208435), ('peuples', 0.23226623237133026), ('sujet', 0.22632962465286255), ('empecher', 0.2261267751455307), ('puissent', 0.22403788566589355), ('lfi', 0.22199535369873047), ('faites', 0.21933452785015106), ('europeennes', 0.21626520156860352), ('accompagnement', 0.2160995453596115), ('ivg', 0.2121640145778656), ('societe', 0.20908771455287933), ('revenu', 0.20883135497570038)]


In [68]:
result = model_AH.wv.most_similar(positive=['entreprise'], topn=20)
print(result)

[('comptable', 0.5202212333679199), ('sols', 0.41280192136764526), ('depenses', 0.4071543514728546), ('preuve', 0.3941413462162018), ('page', 0.3856995403766632), ('mds', 0.37629079818725586), ('usagers', 0.3563571274280548), ('carbone', 0.35471275448799133), ('ecarts', 0.345926433801651), ('entreprises', 0.33928415179252625), ('foot', 0.3359219431877136), ('travaux', 0.3301438093185425), ('odyssees', 0.3269922435283661), ('economie', 0.3227898180484772), ('lancer', 0.3202676773071289), ('bloquer', 0.32012686133384705), ('lance', 0.31604135036468506), ('carburants', 0.3136537969112396), ('investissement', 0.31079941987991333), ('die', 0.30482953786849976)]


In [69]:
result = model_AH.wv.most_similar(positive=['economie'], topn=20)
print(result)

[('mobilite', 0.41771724820137024), ('electriques', 0.39899158477783203), ('croissance', 0.3843429386615753), ('odyssees', 0.3760421872138977), ('pilier', 0.373639315366745), ('finance', 0.36747539043426514), ('fonds', 0.3630561828613281), ('renforcement', 0.3542661964893341), ('usagers', 0.34782275557518005), ('protection', 0.34738048911094666), ('secteurs', 0.3444274663925171), ('sols', 0.3443007171154022), ('agricole', 0.3438834249973297), ('carbone', 0.3380207121372223), ('alimentation', 0.33680155873298645), ('economique', 0.3361111581325531), ('eco', 0.33399683237075806), ('renovation', 0.3324301242828369), ('champ', 0.32985439896583557), ('organisations', 0.3251557946205139)]


In [70]:
result = model_AH.wv.most_similar(positive=['emploi'], topn=20)
print(result)

[('developpement', 0.3724491000175476), ('dotation', 0.3557223677635193), ('bonheur', 0.3528076708316803), ('qualite', 0.35114172101020813), ('emplois', 0.3330519199371338), ('alimentation', 0.32615649700164795), ('formation', 0.30837196111679077), ('reussite', 0.3051411211490631), ('enseignement', 0.30452218651771545), ('biodiversite', 0.3024701476097107), ('installation', 0.2955058813095093), ('faciliter', 0.2918851375579834), ('augmente', 0.2877294421195984), ('eau', 0.28384944796562195), ('numerique', 0.28377529978752136), ('quartiers', 0.282855749130249), ('mds', 0.27929621934890747), ('gratuite', 0.2708461582660675), ('grands', 0.2701988220214844), ('nantesmetropole', 0.2700428068637848)]


In [71]:
result = model_AH.wv.most_similar(positive=['travail'], topn=20)
print(result)

[('remuneration', 0.30452030897140503), ('realiste', 0.2501106262207031), ('emploi', 0.24749557673931122), ('chomage', 0.2444220334291458), ('revaloriserai', 0.23987029492855072), ('publics', 0.23631884157657623), ('emplois', 0.2347354292869568), ('promesse', 0.22812102735042572), ('mandat', 0.22741606831550598), ('valeur', 0.2263251692056656), ('public', 0.2232770174741745), ('familles', 0.22090263664722443), ('accessible', 0.21758440136909485), ('salaires', 0.21655145287513733), ('ecarts', 0.21306751668453217), ('fhollande', 0.2108084261417389), ('evidemment', 0.20781250298023224), ('finir', 0.20712019503116608), ('residents', 0.20528724789619446), ('longue', 0.20489373803138733)]


In [72]:
result = model_AH.wv.most_similar(positive=['smic'], topn=20)
print(result)

[('salariales', 0.5740991830825806), ('net', 0.5737103223800659), ('augmenterai', 0.5683633089065552), ('salaires', 0.5678843855857849), ('branches', 0.5621363520622253), ('hausse', 0.5388776063919067), ('revaloriserai', 0.527070164680481), ('augmentation', 0.5226883292198181), ('ouvrirai', 0.5183122158050537), ('salaire', 0.516595184803009), ('ecarts', 0.47715651988983154), ('porterai', 0.45340800285339355), ('augmente', 0.42989227175712585), ('assurance', 0.4287497401237488), ('chomage', 0.4139602482318878), ('laref', 0.3990229368209839), ('revalorisation', 0.3970962166786194), ('indice', 0.3924151062965393), ('cout', 0.38098767399787903), ('bonjourchezvous', 0.3782467842102051)]


In [73]:
result = model_AH.wv.most_similar(positive=['numerique'], topn=20)
print(result)

[('evenement', 0.5454250574111938), ('innovation', 0.5275990962982178), ('mobilite', 0.49291500449180603), ('infranum', 0.4634096622467041), ('secteur', 0.43550387024879456), ('fracture', 0.417537659406662), ('odyssees', 0.411298006772995), ('secteurs', 0.37465527653694153), ('nantesmetropole', 0.3587471544742584), ('musique', 0.34545570611953735), ('investissement', 0.34306302666664124), ('durable', 0.33454790711402893), ('essentiels', 0.32925277948379517), ('recherche', 0.32707276940345764), ('responsable', 0.32430773973464966), ('carbone', 0.32399782538414), ('pilier', 0.317931205034256), ('acces', 0.30832716822624207), ('finance', 0.30758437514305115), ('prive', 0.3057272732257843)]


In [74]:
result = model_AH.wv.most_similar(positive=['donnees'], topn=20)
print(result)

[('gafam', 0.45849278569221497), ('abordable', 0.4440786838531494), ('poser', 0.4355752468109131), ('maitrise', 0.43164941668510437), ('cap', 0.4123902916908264), ('loyers', 0.3607327342033386), ('zones', 0.36041703820228577), ('essence', 0.3409052789211273), ('tva', 0.31939253211021423), ('innovation', 0.3082515299320221), ('cout', 0.305747389793396), ('raison', 0.3024924695491791), ('defense', 0.2963048219680786), ('humains', 0.29442209005355835), ('consensus', 0.29282912611961365), ('encadrement', 0.2920837104320526), ('progres', 0.292079359292984), ('immediatement', 0.29024139046669006), ('construire', 0.2842065095901489), ('essentielle', 0.28398117423057556)]


In [75]:
result = model_AH.wv.most_similar(positive=['monde'], topn=20)
print(result)

[('refaire', 0.28908899426460266), ('union', 0.26282647252082825), ('dangereux', 0.2500469982624054), ('entier', 0.23584188520908356), ('pilier', 0.22605755925178528), ('maniere', 0.21738481521606445), ('exclure', 0.21145999431610107), ('donnees', 0.19533292949199677), ('decisions', 0.19217577576637268), ('pekin', 0.18941394984722137), ('electorale', 0.18890315294265747), ('chaque', 0.18705439567565918), ('oubliez', 0.185744971036911), ('rester', 0.185329407453537), ('positions', 0.18389225006103516), ('par', 0.18233095109462738), ('cop', 0.18103046715259552), ('militaires', 0.18094590306282043), ('interview', 0.18005026876926422), ('refus', 0.17990833520889282)]


In [76]:
result = model_AH.wv.most_similar(positive=['realite'], topn=20)
print(result)

[('simple', 0.4466332793235779), ('faute', 0.34693753719329834), ('credible', 0.3382064402103424), ('papier', 0.33003515005111694), ('audition', 0.31344467401504517), ('devenir', 0.3003777861595154), ('fracture', 0.2994687259197235), ('realiste', 0.29705721139907837), ('paul_denton', 0.295624315738678), ('enseignement', 0.2940289378166199), ('avis', 0.2920251786708832), ('rtlfrance', 0.283662885427475), ('photo', 0.2758488059043884), ('insupportable', 0.27270928025245667), ('triste', 0.27122554183006287), ('franceinter', 0.27064812183380127), ('feminisme', 0.2668645679950714), ('cavousf', 0.2647218704223633), ('transforme', 0.26320335268974304), ('jeancastex', 0.2611727714538574)]


In [77]:
result = model_AH.wv.most_similar(positive=['verite'], topn=20)
print(result)

[('racistes', 0.47735005617141724), ('petain', 0.469911128282547), ('simple', 0.4392736256122589), ('shoah', 0.4361288249492645), ('complexe', 0.4148815870285034), ('lucbroussy', 0.41276323795318604), ('rappelle', 0.40143677592277527), ('vichy', 0.39762577414512634), ('origines', 0.3898366689682007), ('marine', 0.3709781765937805), ('manifestants', 0.36807698011398315), ('tiennent', 0.3482910394668579), ('devrait', 0.34806370735168457), ('propos', 0.343021422624588), ('travaille', 0.33754125237464905), ('sauf', 0.3360966444015503), ('tragique', 0.33379101753234863), ('certains', 0.3322315514087677), ('qualifier', 0.3315531313419342), ('benefices', 0.3311116397380829)]


In [78]:
result = model_AH.wv.most_similar(positive=['societe'], topn=20)
print(result)

[('precieux', 0.381132572889328), ('aines', 0.36713463068008423), ('bienveillante', 0.3328389823436737), ('civile', 0.3111730217933655), ('inclusive', 0.29272687435150146), ('republique', 0.2884858250617981), ('cout', 0.2677302360534668), ('changions', 0.26369690895080566), ('vision', 0.2584975063800812), ('vieillir', 0.25551047921180725), ('champ', 0.25428423285484314), ('renforcement', 0.25345155596733093), ('decider', 0.24807973206043243), ('totalement', 0.2474924623966217), ('payes', 0.24624770879745483), ('permettent', 0.2454967498779297), ('offrir', 0.2447982281446457), ('cap', 0.24459949135780334), ('seniors', 0.24281750619411469), ('soient', 0.2417803406715393)]


In [79]:
result = model_AH.wv.most_similar(positive=['pouvoir'], topn=20)
print(result)

[('possibilite', 0.3051675260066986), ('destin', 0.27359744906425476), ('parlons', 0.255574107170105), ('concretes', 0.25329533219337463), ('dignement', 0.2503538727760315), ('communs', 0.25003063678741455), ('electriques', 0.24914535880088806), ('continue', 0.23456066846847534), ('sujets', 0.23340171575546265), ('achat', 0.22382187843322754), ('democrate', 0.22267189621925354), ('sondages', 0.22228620946407318), ('profondement', 0.22152754664421082), ('determinee', 0.220190167427063), ('attendent', 0.21638739109039307), ('climatique', 0.21633389592170715), ('moyennes', 0.21586249768733978), ('legitime', 0.21335414052009583), ('preoccupation', 0.21249380707740784), ('vite', 0.21172593533992767)]


In [80]:
result = model_AH.wv.most_similar(positive=['avenir'], topn=20)
print(result)

[('marc', 0.30100083351135254), ('battons', 0.29043400287628174), ('changeons', 0.28612348437309265), ('volonte', 0.2845993936061859), ('chemin', 0.28376731276512146), ('parraine', 0.27413538098335266), ('methode', 0.27089330554008484), ('refaire', 0.26701632142066956), ('faisons', 0.26324155926704407), ('hidalgorennes', 0.26245880126953125), ('prosperite', 0.2522628605365753), ('tronc', 0.24550585448741913), ('majeurs', 0.24447092413902283), ('sujets', 0.24382281303405762), ('avancer', 0.2412956804037094), ('socialiste', 0.24051952362060547), ('confiance', 0.2400171011686325), ('determinee', 0.23867982625961304), ('social', 0.23697319626808167), ('choses', 0.23516565561294556)]


In [81]:
result = model_AH.wv.most_similar(positive=['histoire'], topn=20)
print(result)

[('combattants', 0.3071972131729126), ('sahel', 0.30710071325302124), ('morale', 0.29406267404556274), ('forces', 0.2926237881183624), ('antoniocostaps', 0.288343220949173), ('combattre', 0.27941837906837463), ('hiv', 0.27399811148643494), ('assassines', 0.27377334237098694), ('propos', 0.270337849855423), ('graves', 0.26790398359298706), ('tragique', 0.26584938168525696), ('hollande', 0.2599030137062073), ('jlmelenchon', 0.2582453489303589), ('reelue', 0.25683242082595825), ('aime', 0.2560202479362488), ('memoires', 0.256011426448822), ('ralliements', 0.2547282874584198), ('accepte', 0.2543482482433319), ('vouloir', 0.25295189023017883), ('relache', 0.25274956226348877)]


In [82]:
result = model_AH.wv.most_similar(positive=['contre'], topn=20)
print(result)

[('harcelement', 0.36389148235321045), ('violences', 0.3537256419658661), ('racisme', 0.3272739350795746), ('terrorisme', 0.3267793357372284), ('islamiste', 0.32606953382492065), ('antisemitisme', 0.30016788840293884), ('discriminations', 0.2955075800418854), ('faites', 0.2887524962425232), ('blesses', 0.2851293385028839), ('tabou', 0.27875399589538574), ('rechauffement', 0.2741753160953522), ('don', 0.26858580112457275), ('precarite', 0.2669621706008911), ('proposee', 0.2568177878856659), ('poursuivre', 0.2564598321914673), ('commencant', 0.2543312907218933), ('bilan', 0.25276345014572144), ('jeudi', 0.24964889883995056), ('moyen', 0.24943232536315918), ('forces', 0.23353268206119537)]


In [83]:
result = model_AH.wv.most_similar(positive=['faut'], topn=20)
print(result)

[('veut', 0.30849942564964294), ('arreter', 0.30832216143608093), ('assume', 0.30176687240600586), ('fatalite', 0.27309510111808777), ('faudrait', 0.26373976469039917), ('agresse', 0.25921866297721863), ('raison', 0.2569226920604706), ('maniere', 0.25522276759147644), ('immediat', 0.2522323429584503), ('cesse', 0.2506624758243561), ('proteger', 0.24927698075771332), ('reaction', 0.24636998772621155), ('poids', 0.24080751836299896), ('trouver', 0.24065646529197693), ('consequences', 0.2401774525642395), ('vrai', 0.2394070029258728), ('menee', 0.2387489676475525), ('bloquer', 0.2385166436433792), ('pourra', 0.2307165563106537), ('diplomatie', 0.2304472178220749)]


In [84]:
result = model_AH.wv.most_similar(positive=['crise'], topn=20)
print(result)

[('devenir', 0.3928050696849823), ('sanitaire', 0.3513595759868622), ('lemondefr', 0.31834131479263306), ('puissance', 0.29567137360572815), ('train', 0.2899491786956787), ('sujets', 0.2674969434738159), ('responsabilite', 0.26642128825187683), ('contexte', 0.25426575541496277), ('quinquennat', 0.25361523032188416), ('scandale', 0.2518555521965027), ('cooperations', 0.24231913685798645), ('assume', 0.24136504530906677), ('rechauffement', 0.23967333137989044), ('mobilise', 0.23547481000423431), ('provenance', 0.23372286558151245), ('crises', 0.23361945152282715), ('diplomatie', 0.22754372656345367), ('credible', 0.2270706444978714), ('encadrement', 0.2259240597486496), ('mondiale', 0.2239297479391098)]
