In [31]:
%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [32]:
import dataiku
from dataiku import pandasutils as pdu

# classic
import time

# data structure
import pandas as pd
import numpy as np

# nlp packages
import nltk
from nltk.corpus import stopwords
from nltk import word_tokenize
from nltk.stem import WordNetLemmatizer

# custom libs
import karmahutils as kut


# ML
import tensorflow as tf
assert tf.__version__ >= "2.0"
from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
from transformers import pipeline


# widgets

import ipywidgets as widgets
from IPython.display import display

In [33]:
tokenizer = AutoTokenizer.from_pretrained("tblard/tf-allocine", use_fast=True)
model = TFAutoModelForSequenceClassification.from_pretrained("tblard/tf-allocine")
nlp = pipeline('sentiment-analysis', model=model, tokenizer=tokenizer)

All model checkpoint layers were used when initializing TFCamembertForSequenceClassification.

All the layers of TFCamembertForSequenceClassification were initialized from the model checkpoint at tblard/tf-allocine.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFCamembertForSequenceClassification for predictions without further training.


In [34]:
class Color:   
   GREEN = '\033[92m'
   RED = '\033[91m'
   BOLD = '\033[1m'   
   END = '\033[0m'

button = widgets.Button(
    description='CLASSIFY !',
    button_style='success'
  )

text_area = widgets.Textarea(
    value='',
    placeholder='Type something',
    description='',
    disabled=False
)
output = widgets.Output()

def on_button_clicked(b):
    text = text_area.value
    result = nlp(text)
    prediction = result[0]["label"]
    if prediction == "POSITIVE":   
        color = Color.GREEN    
    else:
        color = Color.RED

    with output:    
        print(Color.BOLD + color + f'{prediction}: ' + Color.END + f'"{text[:50]}"')

button.on_click(on_button_clicked)
display(text_area, button, output)

Textarea(value='', placeholder='Type something')

Button(button_style='success', description='CLASSIFY !', style=ButtonStyle())

Output()

### cleaning laure
```python


def clean (text):
    tokenized = word_tokenize(text) # Tokenize
    words_only = [word for word in tokenized if word.isalpha()] # Remove numbers
    stop_words = set(stopwords.words('french')) # Make stopword list
    without_stopwords = [word for word in words_only if not word in stop_words] # Remove Stop Words
    lemma=WordNetLemmatizer() # Initiate Lemmatizer
    lemmatized = [lemma.lemmatize(word) for word in without_stopwords] # Lemmatize
    return lemmatized

# Apply to all texts
df['tweet'] = df['tweet'].apply(clean)
# La fonction clean renvoyant une liste, je transforme cette liste en string
df['tweet'] = df['tweet'].apply(lambda x: ' '.join(map(str, x)))
```

In [35]:
def load_data(first_extract='mediaVaccination', second_extract='dataVaccination5G', format='dss'):
    media_df=kut.load_dataset(first_extract) if not format=="csv" else pd.read_csv(first_extract)
    media_df=media_df.drop('id',axis=1).set_index('status_id')
    print('media df shape',media_df.shape)
    data_df=kut.load_dataset(second_extract) if not format=="csv" else pd.read_csv(second_extract)
    data_df.set_index('status_id',inplace=True)
    print('data df shape',data_df.shape)
    original_size=len(data_df)
    print('joining')
    data_df=data_df.join(media_df).reset_index()
    print(len(data_df),len(data_df)==original_size)
    print(data_df.shape)
    return data_df
data_df=load_data()
data_df.sample()

2021-06-09 14:54:01.698713 loading mediaVaccination
df mediaVaccination loaded: 65882 in 0:00:00.640220
media df shape (65882, 11)
2021-06-09 14:54:02.358922 loading dataVaccination5G
df dataVaccination5G loaded: 193209 in 0:00:04.941109
data df shape (193209, 31)
joining
193209 True
(193209, 43)


Unnamed: 0,status_id,id,pseudo,tweet,tweet_date,tweet_url,tweet_hashtags,tweet_urls,tweet_mentions,tweet_formatted,is_rt,retweeted_status_id,retweeted_screen_name,place_url,place_name,place_full_name,place_type,country,country_code,geo_coords,coords_coords,bbox_coords,name,location,description,followers_count,friends_count,listed_count,statuses_count,favourites_count,account_created_at,profile_expanded_url,urls_url,urls_t.co,urls_expanded_url,media_url,media_t.co,media_expanded_url,media_type,ext_media_url,ext_media_t.co,ext_media_expanded_url,ext_media_type
61379,1245795721229389824,61380,fatihabadaoui2,RT @collCartonJaune Vu le rejet idéologique de...,2020-04-02 19:30:20.0,http://twitter.com/fatihabadaoui2/statuses/124...,chloroquine|COVID,,collCartonJaune,rt @collcartonjaune vu le rejet ideologique de...,True,1.24564e+18,collCartonJaune,,,,,,,NA NA,NA NA,NA NA NA NA NA NA NA NA,badaoui (RN#MLP_officiel),,,1360.0,4315.0,3.0,13729.0,6782.0,2015-12-26 17:35:47,,,,,,,,,,,,


In [37]:
def clean_tweets(data,tweet_column='tweet',method='laure',clean_column=None):
    kut.display_message('cleaning tweet column','name: '+tweet_column)
    if clean_column is None :
        clean_column=tweet_column
    print('output',clean_column)
    start=kut.yet()
    def clean (text):
        tokenized = word_tokenize(text) # Tokenize
        words_only = [word for word in tokenized if word.isalpha()] # Remove numbers
        stop_words = set(stopwords.words('french')) # Make stopword list
        without_stopwords = [word for word in words_only if not word in stop_words] # Remove Stop Words
        lemma=WordNetLemmatizer() # Initiate Lemmatizer
        lemmatized = [lemma.lemmatize(word) for word in without_stopwords] # Lemmatize
        return ' '.join([str(X) for X in lemmatized])

    # Apply to all texts
    data[clean_column] = data[tweet_column].apply(clean)
    kut.job_done(start=start)
    return data
data_df=clean_tweets(data= data_df,clean_column='lemmatizedTweet')
data_df.sample()

*************************
* cleaning tweet column *
* name: tweet           *
*************************
output lemmatizedTweet
job done in 0:02:18.364897 


Unnamed: 0,status_id,id,pseudo,tweet,tweet_date,tweet_url,tweet_hashtags,tweet_urls,tweet_mentions,tweet_formatted,is_rt,retweeted_status_id,retweeted_screen_name,place_url,place_name,place_full_name,place_type,country,country_code,geo_coords,coords_coords,bbox_coords,name,location,description,followers_count,friends_count,listed_count,statuses_count,favourites_count,account_created_at,profile_expanded_url,urls_url,urls_t.co,urls_expanded_url,media_url,media_t.co,media_expanded_url,media_type,ext_media_url,ext_media_t.co,ext_media_expanded_url,ext_media_type,lemmatizedTweet
19924,1329402193502478336,19925,barbarafleuret,@AdeSouzy @thomas_pennec Parce que ce vaccin n...,2020-11-19 12:32:36.0,http://twitter.com/barbarafleuret/statuses/132...,,,AdeSouzy,@adesouzy @thomas_pennec parce que ce vaccin n...,False,,,,,,,,,NA NA,NA NA,NA NA NA NA NA NA NA NA,barbara<U+0001F54A>libre-penseuse,france,<U+0001F6AB>je bloque tous les trolls : insult...,356.0,339.0,1.0,4896.0,11368.0,2010-02-20 10:56:35,,,,,,,,,,,,,AdeSouzy Parce vaccin inocule virus atténué co...


In [22]:
initial_weights = model.get_weights()
model.summary()

Model: "tf_camembert_for_sequence_classification_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
roberta (TFRobertaMainLayer) multiple                  110031360 
_________________________________________________________________
classifier (TFRobertaClassif multiple                  592130    
Total params: 110,623,490
Trainable params: 110,623,490
Non-trainable params: 0
_________________________________________________________________


In [65]:
def predict_sentiments(text):
    result = nlp(text)[0]
    return pd.Series({'sentiment_label':result['label'],'sentiment_score':result['score']} )
test_entry=data_df.sample().iloc[0]
test={'lemme' :test_entry.lemmatizedTweet , 'text': test_entry.tweet}
print(test['text'])
print('--')
print(test['lemme'])
predict_sentiments(test['lemme'])

@HugoMacaire Y’a des puces dans le vaccin qui vont permettre de nous pister mec t’es pas du tout au courant...
--
HugoMacaire Y a puce vaccin vont permettre pister mec tout courant


sentiment_label    NEGATIVE
sentiment_score    0.685215
dtype: object

In [73]:
nlp(['this is a text','this is another'])

[{'label': 'NEGATIVE', 'score': 0.5376941561698914},
 {'label': 'NEGATIVE', 'score': 0.5078697204589844}]

In [81]:
start=kut.yet()
test_data=data_df.sample(10)
result=nlp(test_data.lemmatizedTweet.tolist())
kut.job_done(start=start)
pd.DataFrame(result)

job done in 0:00:01.394868 


Unnamed: 0,label,score
0,POSITIVE,0.784953
1,POSITIVE,0.553782
2,NEGATIVE,0.733865
3,NEGATIVE,0.690407
4,POSITIVE,0.724882
5,POSITIVE,0.733601
6,POSITIVE,0.504995
7,POSITIVE,0.562934
8,NEGATIVE,0.639648
9,NEGATIVE,0.657389


In [97]:
def split_me(data,batch_size=2):
    return [df for g,df in data.groupby(np.arange(len(data))//batch_size)]
test_data=data_df.sample(10).reset_index()
split_me(test_data)[-1]

Unnamed: 0,index,status_id,id,pseudo,tweet,tweet_date,tweet_url,tweet_hashtags,tweet_urls,tweet_mentions,tweet_formatted,is_rt,retweeted_status_id,retweeted_screen_name,place_url,place_name,place_full_name,place_type,country,country_code,geo_coords,coords_coords,bbox_coords,name,location,description,followers_count,friends_count,listed_count,statuses_count,favourites_count,account_created_at,profile_expanded_url,urls_url,urls_t.co,urls_expanded_url,media_url,media_t.co,media_expanded_url,media_type,ext_media_url,ext_media_t.co,ext_media_expanded_url,ext_media_type,lemmatizedTweet
8,177254,1293283544261890048,177255,ThiasHDR,RT @jaquieja @Geo75Geo SUIVEZ LES LANCEURS D'A...,2020-08-11 20:29:59.0,http://twitter.com/ThiasHDR/statuses/129328354...,,https://t.co/3HgBVlK7Cv,jaquieja,rt @jaquieja @geo75geo suivez les lanceurs d a...,True,1.293278e+18,jaquieja,,,,,,,NA NA,NA NA,NA NA NA NA NA NA NA NA,Thias,France,"Family, sport & nature <U+0001F6B5><U+0001F3FB...",25.0,149.0,0.0,1032.0,1809.0,2017-10-07 06:17:56,,,,,,,,,,,,,RT jaquieja SUIVEZ LES LANCEURS SURTOUT QUÈBÈC...
9,49740,1288370707114741760,49741,DLFParis16,RT @AldoSterone111 Bill Gates evasif quand CBS...,2020-07-29 07:08:08.0,http://twitter.com/DLFParis16/statuses/1288370...,,,AldoSterone111,rt @aldosterone111 bill gates evasif quand cbs...,True,1.288076e+18,AldoSterone111,,,,,,,NA NA,NA NA,NA NA NA NA NA NA NA NA,DLF Paris 16,"Paris, France",#LaPatrieAvantLesPartis <U+0001F499> <U+0001F1...,1992.0,2938.0,25.0,106593.0,174969.0,2009-10-15 20:16:34,http://www.debout-la-france.fr,,,,,,,,,,,,RT Bill Gates evasif quand CBSN pose question ...


In [123]:
kut.file_stamp()

'2021-06-11_1114'

In [146]:
def load_backup(backup_dir,task_name):
    if backup_dir is None :
        return []
    print('loading intermediate backups')
    files=listdir(backup_dir)
    candidat_files =[X for X in files if task_name in X]
    if len(candidat_files) == 0 :
        print('found no backups for task',taskname)
        return []
    batch_tags=list(set([''.join(X.split('_')[-3:-1]) for X in candidat_files]))
    print('found',len(batch_tags),'jobs')
    print(batch_tags)
    print('selected',max(batch_tags))
    
    return []

In [147]:
from os import listdir
def batch_me(data,instructions,batch_size=100,track_offset=None,with_save_dir=None,with_load_dir=None,task_name='batchedJob'):
    from math import ceil,floor
    
    batch_size=len(data) if batch_size > len(data) else batch_size
    total_batches=ceil(len(data)/batch_size)
    if track_offset is None :
        track_offset = round(0.25*total_batches)
    
    current_batch=0


    kut.display_message('batching',instructions)
    print(floor(len(data)/batch_size),'batches of',batch_size,'rows')
    
    if len(data)%batch_size:
        print('one batch of',len(data)%batch_size,'rows' )
    if track_offset :
        print('track offset every',track_offset,'batch')
    if with_load_dir is True :
            with_load_dir = with_save_dir
    if with_save_dir :
        save_radical=with_save_dir+'_'.join([task_name,kut.file_stamp()])
        print('batch will be saved in:',save_radical+'_*')
    
    if type(instructions) is not list :
        instructions=[instructions]
    #instructions=[X.replace(data,'batch_df') for X in instructions]
    
    
    
    
    treated_stack=load_backup(backup_dir=with_load_dir,task_name=task_name)
    remaining_data=data
    
       
    
    for batch_df in split_me(data=remaining_data,batch_size=batch_size):
        current_batch +=1
        
        for instruction in instructions :
            eval(instruction)
        
        treated_stack.append(batch_df)
        
        
        # intermediate prints and saves
        if not current_batch % track_offset:
            print('done batch',current_batch)
            if with_save_dir:
                save_name='_'.join([save_radical,str(current_batch)])
                current_treated_df=pd.concat(treated_stack,ignore_index=True)  
                current_treated_df.to_csv(save_name)
                print('saved',len(current_treated_df),'rows')
                print(save_name)
        
        
        
    out=pd.concat(treated_stack,ignore_index=True)   
    
    return out
test_data=data_df.sample(30).reset_index()
save_dir='/home/yann/_run-backups/'
instructions=['print(len(batch_df))','print("done")']
batch_me(data=test_data,batch_size=3,instructions=instructions,with_save_dir=save_dir,with_load_dir=True)

**************************************
* batching                           *
* print(len(batch_df)) print("done") *
**************************************
10 batches of 3 rows
track offset every 2 batch
batch will be saved in: /home/yann/_run-backups/batchedJob_2021-06-11_1157_*
loading intermediate backups
found 8 jobs
['2021-06-111134', '2021-06-111121', '2021-06-111136', '2021-06-111133', '2021-06-111123', '2021-06-111130', '2021-06-111119', '2021-06-111128']
selected 2021-06-111136
3
done
3
done
done batch 2
saved 6 rows
/home/yann/_run-backups/batchedJob_2021-06-11_1157_2
3
done
3
done
done batch 4
saved 12 rows
/home/yann/_run-backups/batchedJob_2021-06-11_1157_4
3
done
3
done
done batch 6
saved 18 rows
/home/yann/_run-backups/batchedJob_2021-06-11_1157_6
3
done
3
done
done batch 8
saved 24 rows
/home/yann/_run-backups/batchedJob_2021-06-11_1157_8
3
done
3
done
done batch 10
saved 30 rows
/home/yann/_run-backups/batchedJob_2021-06-11_1157_10


Unnamed: 0,index,status_id,id,pseudo,tweet,tweet_date,tweet_url,tweet_hashtags,tweet_urls,tweet_mentions,tweet_formatted,is_rt,retweeted_status_id,retweeted_screen_name,place_url,place_name,place_full_name,place_type,country,country_code,geo_coords,coords_coords,bbox_coords,name,location,description,followers_count,friends_count,listed_count,statuses_count,favourites_count,account_created_at,profile_expanded_url,urls_url,urls_t.co,urls_expanded_url,media_url,media_t.co,media_expanded_url,media_type,ext_media_url,ext_media_t.co,ext_media_expanded_url,ext_media_type,lemmatizedTweet
0,19638,1335593632863227909,19639,ballgrot69,"RT @Nini_MacBright Le vaccin de Pfizer, il est...",2020-12-06 14:35:10.0,http://twitter.com/ballgrot69/statuses/1335593...,,,Nini_MacBright,"rt @nini_macbright le vaccin de pfizer, il est...",True,1.334933e+18,Nini_MacBright,,,,,,,NA NA,NA NA,NA NA NA NA NA NA NA NA,Ballgrot69,"Lyon, France",,9.0,111.0,0.0,6919.0,19144.0,2016-09-09 07:06:16,,,,,,,,,,,,,RT Le vaccin Pfizer base ARN messager Ça veut ...
1,124483,1347525279644848131,124484,Michel78118339,RT @momotchiii La femme de l'obstétricien de 5...,2021-01-08 12:47:17.0,http://twitter.com/Michel78118339/statuses/134...,Pfizer|manifestation|vaccin|obligatoire|NoForc...,https://t.co/pwA04xFF3Z,momotchiii,rt @momotchiii la femme de l obstetricien de 5...,True,1.347308e+18,momotchiii,,,,,,,NA NA,NA NA,NA NA NA NA NA NA NA NA,Michel <U+0001F1FA><U+0001F1F8> <U+2721><U+FE0...,Switzerland<U+2666><U+FE0F>NY<U+2666><U+FE0F>B...,2020 will be a terrible year <U+0001F648> I fo...,8338.0,8898.0,4.0,78262.0,58235.0,2019-05-12 21:20:13,,,,,,,,,,,,,RT momotchiii La femme an décédé suite vaccin ...
2,188284,1228799794287718402,188285,When_P,RT @bct_julie S’il vous plaît aidez moi à la r...,2020-02-15 21:54:35.0,http://twitter.com/When_P/statuses/12287997942...,,,bct_julie,rt @bct_julie s il vous plait aidez moi a la r...,True,1.228627e+18,bct_julie,,,,,,,NA NA,NA NA,NA NA NA NA NA NA NA NA,Wenn,,Je pense pas que ça intéresse grand monde.,7.0,276.0,0.0,3013.0,5059.0,2013-08-06 20:34:29,,,,,,,,,,,,,RT S plaît aidez retrouvé tiens beaucoup sais ...
3,114330,1332459451010469888,114331,mado1406,RT @ADv_in21 Que fait la Justice de NOTRE pays...,2020-11-27 23:01:03.0,http://twitter.com/mado1406/statuses/133245945...,,https://t.co/IGOF14G7BG,"ADv_in21| DIVIZIO1,| OSTERElizabeth1",rt @adv_in21 que fait la justice de notre pays...,True,1.332066e+18,ADv_in21,,,,,,,NA NA,NA NA,NA NA NA NA NA NA NA NA,ISA,,"Mensonges permanents, cupidité, corruption, pe...",608.0,943.0,0.0,19281.0,32420.0,2020-01-01 14:09:20,,,,,,,,,,,,,RT Que fait Justice NOTRE pay QUI va être mesu...
4,156831,1334865006056370176,156832,reinfocitoyen,RT @Zyrs10 <U+0001F1FA><U+0001F1F8>RETWEET! Tr...,2020-12-04 14:19:52.0,http://twitter.com/reinfocitoyen/statuses/1334...,,https://t.co/i1M5FwS5dg,Zyrs10,rt @zyrs10 retweet! trump envisageait une comm...,True,1.334677e+18,Zyrs10,,,,,,,NA NA,NA NA,NA NA NA NA NA NA NA NA,Reinfocitoyen,,La presse a un pouvoir de l'image si puissant ...,566.0,1080.0,3.0,2419.0,3348.0,2020-11-12 23:12:42,,,,,,,,,,,,,RT RETWEET Trump envisageait commission sécuri...
5,50593,1287318706406776832,50594,dominicvasseur,RT @Planetes360 La planète Gates qui mène de l...,2020-07-26 09:27:51.0,http://twitter.com/dominicvasseur/statuses/128...,,https://t.co/yry19pzpKX,Planetes360,rt @planetes360 la planete gates qui mene de l...,True,1.287314e+18,Planetes360,,,,,,,NA NA,NA NA,NA NA NA NA NA NA NA NA,DV,Marseille /France,,538.0,462.0,67.0,112539.0,130573.0,2013-01-15 17:23:28,,,,,,,,,,,,,RT La planète Gates mène santé identification ...
6,101103,1354664746985922561,101104,languillem,RT @NicolasPichot6 <U+0001F534>Bill Gates: Les...,2021-01-28 05:36:58.0,http://twitter.com/languillem/statuses/1354664...,,https://t.co/MHqgW0Ta0k,NicolasPichot6| ADv_in21,rt @nicolaspichot6 bill gates: les passeports ...,True,1.354577e+18,NicolasPichot6,,,,,,,NA NA,NA NA,NA NA NA NA NA NA NA NA,ludimi Matr 111,france,,6733.0,6262.0,1188.0,1194254.0,479942.0,2012-10-25 17:38:56,,,,,,,,,,,,,RT Bill Gates Les passeports vaccination COVID...
7,180069,1245658451184717824,180070,ToonyBang,"RT @LePetitGuenon Bill Gates demande un ""certi...",2020-04-02 10:24:52.0,http://twitter.com/ToonyBang/statuses/12456584...,NOM,https://t.co/nlbst4XPZt,LePetitGuenon,"rt @lepetitguenon bill gates demande un ""certi...",True,1.245503e+18,LePetitGuenon,,,,,,,NA NA,NA NA,NA NA NA NA NA NA NA NA,ynooT <U+0001F5FA><U+FE0F>,,"Notre patience touche à sa fin, votre bail aussi.",204.0,789.0,3.0,11671.0,15869.0,2012-10-27 12:01:52,,,,,,,,,,,,,RT LePetitGuenon Bill Gates demande certificat...
8,120527,1339157414109736960,120528,Math_omane,@AgnurWild Oui le vaccin rend allergique et t'...,2020-12-16 10:36:22.0,http://twitter.com/Math_omane/statuses/1339157...,,,AgnurWild,@agnurwild oui le vaccin rend allergique et t ...,False,,,,,,,,,NA NA,NA NA,NA NA NA NA NA NA NA NA,"Daddy Math0,1%",Dans Ta Classe,Prof de points G et de plans Q. J'ai le poil s...,3276.0,107.0,44.0,36689.0,8358.0,2014-01-12 11:20:03,,,,,,,,,,,,,AgnurWild Oui vaccin rend allergique puce rept...
9,139383,1239438574535278593,139384,OlivierFraudeau,RT @RaderSerge Plus on tripote les virus dans ...,2020-03-16 06:29:18.0,http://twitter.com/OlivierFraudeau/statuses/12...,,,RaderSerge,rt @raderserge plus on tripote les virus dans ...,True,1.239279e+18,RaderSerge,,,,,,,NA NA,NA NA,NA NA NA NA NA NA NA NA,Olivier Fraudeau,France,"Anti géo-ingénierie, anti Haarp, anti théories...",1539.0,4819.0,31.0,16661.0,5801.0,2015-04-03 16:37:54,,,,,,,,,,,,,RT RaderSerge Plus tripote virus centre aussi ...


In [90]:
def add_sentiment_information(data,lemme_column='lemmatizedTweet'):
    kut.display_message('adding sentiment information based on '+ lemme_column)
    print('expected time is 300s/1000rows')
    start=kut.yet()
    lemme_array=data[lemme_column].tolist()
    nlp_df=pd.DataFrame(nlp(lemme_array)).rename(columns={'label':'sentiment_label','score':'sentiment_score'})
    out=data.join(pd.DataFrame(nlp(lemme_array)))
    kut.job_done(start=start)
    return out
test_data=data_df.sample(10).reset_index()
add_sentiment_information(test_data)

*********************************************************
* adding sentiment information based on lemmatizedTweet *
*********************************************************
expected time is 350s/1000rows
job done in 0:04:49.764419 


Unnamed: 0,index,status_id,id,pseudo,tweet,tweet_date,tweet_url,tweet_hashtags,tweet_urls,tweet_mentions,tweet_formatted,is_rt,retweeted_status_id,retweeted_screen_name,place_url,place_name,place_full_name,place_type,country,country_code,geo_coords,coords_coords,bbox_coords,name,location,description,followers_count,friends_count,listed_count,statuses_count,favourites_count,account_created_at,profile_expanded_url,urls_url,urls_t.co,urls_expanded_url,media_url,media_t.co,media_expanded_url,media_type,ext_media_url,ext_media_t.co,ext_media_expanded_url,ext_media_type,lemmatizedTweet,label,score
0,106856,1276236624721035264,106857,LevyLoiseau,RT @RaderSerge Ce vaccin à ARN est une utopie ...,2020-06-25 19:31:37.0,http://twitter.com/LevyLoiseau/statuses/127623...,,https://t.co/jjZFoTxzol,RaderSerge,rt @raderserge ce vaccin a arn est une utopie ...,True,1.276231e+18,RaderSerge,,,,,,,NA NA,NA NA,NA NA NA NA NA NA NA NA,@Kliment Levi,"Mont-Tremblant, Québec",Pitchipoï,1728.0,3151.0,24.0,276595.0,170113.0,2011-10-27 06:54:40,,,,,,,,,,,,,RT RaderSerge Ce vaccin ARN utopie idiote Par ...,NEGATIVE,0.888195
1,51378,1284347863204601856,51379,DoyonFrancis1,RT @cherielle100 Vaccine Choice Canada poursui...,2020-07-18 04:42:47.0,http://twitter.com/DoyonFrancis1/statuses/1284...,RDI|polqc,https://t.co/qD8pLlQI8P|https://t.co/Lb09ncqxXX,cherielle100| francoislegault,rt @cherielle100 vaccine choice canada poursui...,True,1.284348e+18,cherielle100,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,RT Vaccine Choice Canada poursuit VCC affirme ...,NEGATIVE,0.503438
2,191238,1247421468964782081,191239,ypuget31,RT @EDeymard Covid-19 : La Fondation Gates va ...,2020-04-07 07:10:28.0,http://twitter.com/ypuget31/statuses/124742146...,,https://t.co/QrSJuuciLF,EDeymard| LUsineDigitale,rt @edeymard covid 19 : la fondation gates va ...,True,1.247421e+18,EDeymard,,,,,,,NA NA,NA NA,NA NA NA NA NA NA NA NA,Yannick Puget,Toulouse,Directeur Services & Innovation - iMSA #dsi #p...,634.0,622.0,143.0,8023.0,10525.0,2011-08-24 16:08:45,,,,,,,,,,,,,RT EDeymard La Fondation Gates va financer usi...,NEGATIVE,0.500366
3,134602,1261236632088391680,134603,newsnetfr,"[Leonid Savin] Au fou ! ... Bill Gates, Vaccin...",2020-05-15 10:07:00.0,http://twitter.com/newsnetfr/statuses/12612366...,,https://t.co/aW5zG1GzLo,,"[leonid savin] au fou ! ... bill gates, vaccin...",False,,,,,,,,,NA NA,NA NA,NA NA NA NA NA NA NA NA,newsnet,,Avenir du monde,62.0,0.0,0.0,11736.0,0.0,2019-04-20 18:17:05,http://newsnet.fr,newsnet.fr/173921,https://t.co/aW5zG1GzLo,http://newsnet.fr/173921,,,,,,,,,Leonid Savin Au fou Bill Gates Vaccinations pu...,POSITIVE,0.534847
4,8838,1326890310186758144,8839,alixpv1,RT @us_ain j’espère que la puce que le makhzen...,2020-11-12 14:11:17.0,http://twitter.com/alixpv1/statuses/1326890310...,,,us_ain,rt @us_ain j espere que la puce que le makhzen...,True,1.326856e+18,us_ain,,,,,,,NA NA,NA NA,NA NA NA NA NA NA NA NA,Alix,,,161.0,242.0,1.0,1904.0,1743.0,2013-05-16 14:58:42,,,,,,,,,,,,,RT espère puce makhzen va injecter mime temp v...,NEGATIVE,0.803570
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,173565,1314964094521352192,173566,Swiss_Pepita,RT @heidi_news Quel est le rapport entre les r...,2020-10-10 16:20:45.0,http://twitter.com/Swiss_Pepita/statuses/13149...,,https://t.co/Fjg5Ndend9,heidi_news,rt @heidi_news quel est le rapport entre les r...,True,1.314960e+18,heidi_news,,,,,,,NA NA,NA NA,NA NA NA NA NA NA NA NA,Christiane <U+0001F64B> & Lulu the Cat <U+0001...,"Rhône-Alpes, France",Le plus souvent on cherche son bonheur comme o...,1969.0,2621.0,185.0,303582.0,226712.0,2012-06-12 15:52:36,,,,,,,,,,,,,RT Quel rapport entre réseaux pédophiles Covid...,POSITIVE,0.571888
996,43277,1365474226627633153,43278,DagostinoJordi,@TichotB @ADv_in21 @juniorbequille Mot clés po...,2021-02-27 01:29:59.0,http://twitter.com/DagostinoJordi/statuses/136...,,,TichotB| juniorbequille,@tichotb @adv_in21 @juniorbequille mot cles po...,False,,,,,,,,,NA NA,NA NA,NA NA NA NA NA NA NA NA,Robespierre le nettoyeur,France,Grand reset ou Grand nettoyage\n\nLE CHOIX EST...,82.0,361.0,0.0,2208.0,1301.0,2020-12-29 23:11:26,,,,,,,,,,,,,TichotB juniorbequille Mot clés comprendre ID ...,POSITIVE,0.761010
997,45855,1260541792677699584,45856,Debeaulieu8,RT @RaderSerge Magistrale intervention de Robe...,2020-05-13 12:05:57.0,http://twitter.com/Debeaulieu8/statuses/126054...,,https://t.co/TVzZmksu6d,RaderSerge,rt @raderserge magistrale intervention de robe...,True,1.259567e+18,RaderSerge,,,,,,,NA NA,NA NA,NA NA NA NA NA NA NA NA,Debeaulieu.nour,,"Celui qui se bat peut perdre, celui qui se bat...",1728.0,2804.0,3.0,75955.0,86972.0,2017-03-13 19:58:29,,,,,,,,,,,,,RT RaderSerge Magistrale intervention Robert K...,POSITIVE,0.771645
998,67488,1258372334269935616,67489,fredmontaigu85,@CNEWS S'il vous plait @CNEWS @BFMTV @TF1LeJT ...,2020-05-07 12:25:18.0,http://twitter.com/fredmontaigu85/statuses/125...,JulietteBinoche|BillGates|RFID.,,CNEWS| CNEWS| TF1LeJT| m6info,@cnews s il vous plait @cnews @bfmtv @tf1lejt ...,False,,,,,,,,,NA NA,NA NA,NA NA NA NA NA NA NA NA,Frédéric FILLATRE,Vendée,#Patriote #Résistant et #réfractaire ! Villier...,630.0,855.0,11.0,8188.0,680.0,2010-12-29 22:16:46,,,,,,,,,,,,,CNEWS plait CNEWS BFMTV Francetele LCI autant ...,POSITIVE,0.728939
