In [1]:
import pandas as pd
pd.set_option('display.max_colwidth', 50)

data = pd.read_pickle('pickle/articles.pkl')
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 91 entries, 0 to 91
Data columns (total 2 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   title        91 non-null     object
 1   description  91 non-null     object
dtypes: object(2)
memory usage: 2.1+ KB


In [2]:
import flair
from flair.data import Sentence
sentiment_model = flair.models.TextClassifier.load('en-sentiment')


2020-12-17 08:53:13,693 loading file /Users/alexandre/.flair/models/sentiment-en-mix-distillbert_3.1.pt


In [3]:
def sentiment_per_cat(df, cat):
    sentiment = []
    confidence = []
    for c in data[cat]:
        sent = Sentence(c)
        sentiment_model.predict(sent)
        sentiment.append(sent.labels[0].value)
        confidence.append(sent.labels[0].score)
    df["sentiment_" + cat] = sentiment
    df["confidence_" + cat] = confidence
    
    return df

In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 91 entries, 0 to 91
Data columns (total 2 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   title        91 non-null     object
 1   description  91 non-null     object
dtypes: object(2)
memory usage: 2.1+ KB


In [5]:
# Analyse des titres
sentiment_per_cat(data, "title")
# Analyse des descriptions
sentiment_per_cat(data, "description")

Unnamed: 0,title,description,sentiment_title,confidence_title,sentiment_description,confidence_description
0,with investors expecting a latin american cryp...,six years after the launch of the mexicobased ...,NEGATIVE,0.992876,NEGATIVE,0.990348
1,coinbases backstory and future with kings of c...,i used coinbase as a vehicle because a lot of ...,POSITIVE,0.993253,POSITIVE,0.998374
2,learn the basics of trading cryptocurrency for 30,tldr learn about cryptocurrency before you inv...,NEGATIVE,0.726090,POSITIVE,0.980591
3,criminals getting smarter in use of digital cu...,criminals are becoming more sophisticated in t...,NEGATIVE,0.976688,NEGATIVE,0.604418
4,central banks split on role of private sector ...,central banks are split over whether to collab...,NEGATIVE,0.945959,NEGATIVE,0.940025
...,...,...,...,...,...,...
87,blockchain bites bitcoin shorts in microstrate...,bitcoins options market has flipped bearish fo...,NEGATIVE,0.999585,NEGATIVE,0.999258
88,okcoin agrees to list blockstacks stx token in...,okcoin announced it will become the first usba...,NEGATIVE,0.666741,POSITIVE,0.897304
89,standard chartered northern trust to launch cr...,standard chartereds fintech investment unit sc...,POSITIVE,0.982585,POSITIVE,0.908878
90,76m ether fund makes world first ipo on canadi...,3iq said its ether fund has completed an initi...,POSITIVE,0.721812,POSITIVE,0.981771


In [7]:
# Remove low confidence score
masque1 = data.confidence_title < 0.9
data = data.drop(data[masque1].index, axis=0)
masque2 = data.confidence_description < 0.8
data = data.drop(data[masque2].index, axis=0)

# Remove article with oposite sentiment from title and description
masque3 = data.sentiment_title != data.sentiment_description
data = data.drop(data[masque3].index, axis=0)
data = data.reset_index(drop = True)

In [8]:
data

Unnamed: 0,title,description,sentiment_title,confidence_title,sentiment_description,confidence_description
0,with investors expecting a latin american cryp...,six years after the launch of the mexicobased ...,NEGATIVE,0.992876,NEGATIVE,0.990348
1,coinbases backstory and future with kings of c...,i used coinbase as a vehicle because a lot of ...,POSITIVE,0.993253,POSITIVE,0.998374
2,central banks split on role of private sector ...,central banks are split over whether to collab...,NEGATIVE,0.945959,NEGATIVE,0.940025
3,pound drops as brexit impasse unsettled after ...,the british pound lost about 05 on thursday af...,NEGATIVE,0.982288,NEGATIVE,0.998717
4,column getting mighty crowded searching for m...,the biggest fear in markets is often of market...,NEGATIVE,0.99922,NEGATIVE,0.959164
5,column getting mighty crowded searching for m...,column getting mighty crowded searching for m...,NEGATIVE,0.999172,NEGATIVE,0.999172
6,bitcoins intrinsic value will rise in the comi...,summary list placementullijpmorgan strategists...,NEGATIVE,0.996448,NEGATIVE,0.99998
7,global stocks rise on higher white house stimu...,summary list placementulliglobal stocks rose o...,NEGATIVE,0.902753,NEGATIVE,0.999673
8,jpmorgan ceo jamie dimon called treasurys a po...,summary list placementullijpmorgans jamie dimo...,NEGATIVE,0.999912,NEGATIVE,0.999934
9,looking to invest in cryptocurrency this tradi...,these courses can help you trade cryptocurrency,POSITIVE,0.983302,POSITIVE,0.991646


In [9]:
data.to_pickle("pickle/articles_sentiment_analysis.pkl")