## Environment Setting and Data Loading

In [3]:
import pandas as pd
import numpy as np

In [4]:
import multiprocessing

num_processors = multiprocessing.cpu_count()
num_processors

16

In [6]:
from pandarallel import pandarallel

# Initialize pandarallel
pandarallel.initialize(nb_workers = multiprocessing.cpu_count()-1, use_memory_fs=False, progress_bar=True)

INFO: Pandarallel will run on 15 workers.
INFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.


In [37]:
df_filtered = pd.read_parquet('filtered_data.parquet', engine='pyarrow')

In [5]:
df_filtered.head(1)

Unnamed: 0,date,domain,clean_title,clean_text,clean_token
0,2021-03-18,en.people.cn,Artificial intelligence improves parking effic...,Thanks to the application of an artificial int...,thanks application artificial intelligence ai ...


In [6]:
df_filtered.shape

(165530, 5)

## Sentiment Analysis with Vader

In [25]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

#### With Full Data

In [12]:
df_filtered.shape

(165530, 5)

In [13]:
%%time
analyzer = SentimentIntensityAnalyzer()

sentiment_vader = pd.DataFrame()
sentiment_vader['vader'] = df_filtered['clean_text'].parallel_apply(analyzer.polarity_scores)
sentiment_vader = pd.DataFrame(sentiment_vader['vader'].tolist())

# Create a positive / negative sentiment variable
sentiment_vader['sentiment'] = np.where(sentiment_vader['compound'].astype(float) > 0, 'Positive', 'Negative')
sentiment_vader['sentiment'] = np.where(sentiment_vader['compound'].astype(float) == 0, 'Neutral', sentiment_vader['sentiment'])

# Merge Vader values with Sentiments
df_filtered = pd.merge(df_filtered, sentiment_vader, left_index=True, right_index=True)

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=8713), Label(value='0 / 8713'))), …

CPU times: total: 8.92 s
Wall time: 10min 6s


In [14]:
df_filtered.sentiment.value_counts()

Positive    145672
Negative     15499
Neutral       4359
Name: sentiment, dtype: int64

In [15]:
df_filtered = df_filtered.drop(['neg', 'neu', 'pos'], axis = 1)

In [16]:
df_filtered = df_filtered.rename(columns={'compound': 'sentiment_score_vader', 'sentiment': 'sentiment_vader'})
df_filtered.head(1)

Unnamed: 0,date,domain,clean_title,clean_text,clean_token,sentiment_score_vader,sentiment_vader
0,2021-03-18,en.people.cn,Artificial intelligence improves parking effic...,Thanks to the application of an artificial int...,thanks application artificial intelligence ai ...,0.997,Positive


In [17]:
# save filtered df
df_filtered.to_parquet('filtered_data_sentiment.parquet', engine='pyarrow')

## Sentiment Analysis with SVM Fine Tuned with Yelp

In [18]:
df_filtered = pd.read_parquet('filtered_data_sentiment.parquet', engine='pyarrow')

In [19]:
df_filtered.head()

Unnamed: 0,date,domain,clean_title,clean_text,clean_token,sentiment_score_vader,sentiment_vader
0,2021-03-18,en.people.cn,Artificial intelligence improves parking effic...,Thanks to the application of an artificial int...,thanks application artificial intelligence ai ...,0.997,Positive
1,2020-02-27,newsparliament.com,Children With Autism Saw Their Learning and So...,Coronavirus Could Explode in the U.S. Overnigh...,coronavirus could explode overnight like italy...,0.9988,Positive
2,2021-03-26,www.dataweek.co.za,"Forget ML, AI and Industry 4.0",*mar21-abi Many decision-makers only realise t...,many realise problem hand regret following tag...,0.9965,Positive
3,2021-03-10,www.homeoffice.consumerelectronicsnet.com,Strategy Analytics: 71% of Smartphones Sold Gl...,BOSTON–(BUSINESS WIRE)–Strategy Analytics in a...,business wire analytics newly published report...,0.9991,Positive
4,2020-10-20,www.itbusinessnet.com,Olympus to Support Endoscopic AI Diagnosis Edu...,"TOKYO, Oct 20, 2020 – (ACN Newswire) – Olympus...",tokyo oct acn newswire olympus corporation too...,0.9936,Positive


In [20]:
import pickle

# load vectorizer
with open('yelp_sentiment_vect.pkl', 'rb') as f:
    vect = pickle.load(f)

# load svm
with open('yelp_sentiment_svm.pkl', 'rb') as f:
    svm = pickle.load(f)

In [21]:
def sentiment_prediction_svm(text):
        
    sentiment = svm.predict(vect.transform(text))
    return sentiment

In [22]:
sentiment_svm = sentiment_prediction_svm(df_filtered.clean_text.to_list())

In [23]:
df_filtered['sentiment_svm'] = sentiment_svm

In [24]:
df_filtered.sentiment_svm.value_counts()

0    104652
1     60878
Name: sentiment_svm, dtype: int64

In [25]:
df_filtered.head()

Unnamed: 0,date,domain,clean_title,clean_text,clean_token,sentiment_score_vader,sentiment_vader,sentiment_svm
0,2021-03-18,en.people.cn,Artificial intelligence improves parking effic...,Thanks to the application of an artificial int...,thanks application artificial intelligence ai ...,0.997,Positive,0
1,2020-02-27,newsparliament.com,Children With Autism Saw Their Learning and So...,Coronavirus Could Explode in the U.S. Overnigh...,coronavirus could explode overnight like italy...,0.9988,Positive,0
2,2021-03-26,www.dataweek.co.za,"Forget ML, AI and Industry 4.0",*mar21-abi Many decision-makers only realise t...,many realise problem hand regret following tag...,0.9965,Positive,0
3,2021-03-10,www.homeoffice.consumerelectronicsnet.com,Strategy Analytics: 71% of Smartphones Sold Gl...,BOSTON–(BUSINESS WIRE)–Strategy Analytics in a...,business wire analytics newly published report...,0.9991,Positive,1
4,2020-10-20,www.itbusinessnet.com,Olympus to Support Endoscopic AI Diagnosis Edu...,"TOKYO, Oct 20, 2020 – (ACN Newswire) – Olympus...",tokyo oct acn newswire olympus corporation too...,0.9936,Positive,0


In [26]:
# save filtered df
df_filtered.to_parquet('filtered_data_sentiment.parquet', engine='pyarrow')

## Sentiment Analysis with Logistic Regression Fine Tuned with Yelp

In [3]:
import pickle
# load vectorizer
with open('yelp_sentiment_vect_logreg.pkl', 'rb') as f:
    vect_logreg = pickle.load(f)

# load svm
with open('yelp_sentiment_logreg.pkl', 'rb') as f:
    logreg = pickle.load(f)

In [7]:
import pandas as pd
df_filtered = pd.read_parquet('filtered_data_sentiment.parquet', engine='pyarrow')
df_filtered.head()

Unnamed: 0,date,domain,clean_title,clean_text,clean_token,sentiment_score_vader,sentiment_vader,sentiment_svm
0,2021-03-18,en.people.cn,Artificial intelligence improves parking effic...,Thanks to the application of an artificial int...,thanks application artificial intelligence ai ...,0.997,Positive,0
1,2020-02-27,newsparliament.com,Children With Autism Saw Their Learning and So...,Coronavirus Could Explode in the U.S. Overnigh...,coronavirus could explode overnight like italy...,0.9988,Positive,0
2,2021-03-26,www.dataweek.co.za,"Forget ML, AI and Industry 4.0",*mar21-abi Many decision-makers only realise t...,many realise problem hand regret following tag...,0.9965,Positive,0
3,2021-03-10,www.homeoffice.consumerelectronicsnet.com,Strategy Analytics: 71% of Smartphones Sold Gl...,BOSTON–(BUSINESS WIRE)–Strategy Analytics in a...,business wire analytics newly published report...,0.9991,Positive,1
4,2020-10-20,www.itbusinessnet.com,Olympus to Support Endoscopic AI Diagnosis Edu...,"TOKYO, Oct 20, 2020 – (ACN Newswire) – Olympus...",tokyo oct acn newswire olympus corporation too...,0.9936,Positive,0


In [8]:
def sentiment_prediction_logreg(text):
        
    sentiment = logreg.predict(vect_logreg.transform(text))
    return sentiment

In [9]:
sentiment_logreg = sentiment_prediction_logreg(df_filtered.clean_text.to_list())
df_filtered['sentiment_logreg'] = sentiment_logreg

In [11]:
df_filtered.head()

Unnamed: 0,date,domain,clean_title,clean_text,clean_token,sentiment_score_vader,sentiment_vader,sentiment_svm,sentiment_logreg
0,2021-03-18,en.people.cn,Artificial intelligence improves parking effic...,Thanks to the application of an artificial int...,thanks application artificial intelligence ai ...,0.997,Positive,0,0
1,2020-02-27,newsparliament.com,Children With Autism Saw Their Learning and So...,Coronavirus Could Explode in the U.S. Overnigh...,coronavirus could explode overnight like italy...,0.9988,Positive,0,0
2,2021-03-26,www.dataweek.co.za,"Forget ML, AI and Industry 4.0",*mar21-abi Many decision-makers only realise t...,many realise problem hand regret following tag...,0.9965,Positive,0,1
3,2021-03-10,www.homeoffice.consumerelectronicsnet.com,Strategy Analytics: 71% of Smartphones Sold Gl...,BOSTON–(BUSINESS WIRE)–Strategy Analytics in a...,business wire analytics newly published report...,0.9991,Positive,1,1
4,2020-10-20,www.itbusinessnet.com,Olympus to Support Endoscopic AI Diagnosis Edu...,"TOKYO, Oct 20, 2020 – (ACN Newswire) – Olympus...",tokyo oct acn newswire olympus corporation too...,0.9936,Positive,0,0


In [12]:
df_filtered.sentiment_logreg.value_counts()

0    104714
1     60816
Name: sentiment_logreg, dtype: int64

In [13]:
df_filtered.sentiment_svm.value_counts()

0    104652
1     60878
Name: sentiment_svm, dtype: int64

## Sentiment Analysis with Bert Fine Tuned with Yelp

In [9]:
# !pip install ktrain

In [10]:
import ktrain
from ktrain import text

caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io_plugins.so: undefined symbol: _ZN3tsl6StatusC1EN10tensorflow5error4CodeESt17basic_string_viewIcSt11char_traitsIcEENS_14SourceLocationE']
caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io.so: undefined symbol: _ZTVN10tensorflow13GcsFileSystemE']


In [11]:
text.print_text_classifiers()

fasttext: a fastText-like model [http://arxiv.org/pdf/1607.01759.pdf]
logreg: logistic regression using a trainable Embedding layer
nbsvm: NBSVM model [http://www.aclweb.org/anthology/P12-2018]
bigru: Bidirectional GRU with pretrained fasttext word vectors [https://fasttext.cc/docs/en/crawl-vectors.html]
standard_gru: simple 2-layer GRU with randomly initialized embeddings
bert: Bidirectional Encoder Representations from Transformers (BERT) from keras_bert [https://arxiv.org/abs/1810.04805]
distilbert: distilled, smaller, and faster BERT from Hugging Face transformers [https://arxiv.org/abs/1910.01108]


In [12]:
predictor = ktrain.load_predictor('predictor')



In [15]:
%%time
df_filtered = pd.read_pickle('filtered_data_sentiment_NER.pkl')
df_filtered.head(1)

CPU times: user 29.7 s, sys: 2.47 s, total: 32.2 s
Wall time: 32.2 s


Unnamed: 0,date,domain,clean_title,clean_text,clean_token,sentiment_score_vader,sentiment_vader,sentiment_svm,sentiment_score_vader_customized,sentiment_vader_customized,NER_word,ORG_NER,PERSON_NER,GPE_NER,sentiment_ner
0,2021-03-18,en.people.cn,Artificial intelligence improves parking effic...,Thanks to the application of an artificial int...,thanks application artificial intelligence ai ...,0.997,Positive,0,0.9961,Positive,"[[ETC, ETC, AI, Wang, ETC, Wang, ETC, Ministry...","[ETC, ETC, AI, Wang, ETC, Wang, ETC, Ministry ...","[Xiang Yanping, Xiang]","[China, Beijing, Chaoyang, Beijing, Beijing, B...",E...


In [16]:
%%time
sentiment_bert_yelp = predictor.predict(df_filtered.clean_text.tolist())

CPU times: user 2d 7h 32min 20s, sys: 3h 22min 53s, total: 2d 10h 55min 14s
Wall time: 5h 39min 5s


In [22]:
df_filtered['sentiment_bert'] = sentiment_bert_yelp

In [26]:
df_filtered.head(2)

Unnamed: 0,date,domain,clean_title,clean_text,clean_token,sentiment_score_vader,sentiment_vader,sentiment_svm,sentiment_score_vader_customized,sentiment_vader_customized,NER_word,ORG_NER,PERSON_NER,GPE_NER,sentiment_ner,sentiment_bert
0,2021-03-18,en.people.cn,Artificial intelligence improves parking effic...,Thanks to the application of an artificial int...,thanks application artificial intelligence ai ...,0.997,Positive,0,0.9961,Positive,"[[ETC, ETC, AI, Wang, ETC, Wang, ETC, Ministry...","[ETC, ETC, AI, Wang, ETC, Wang, ETC, Ministry ...","[Xiang Yanping, Xiang]","[China, Beijing, Chaoyang, Beijing, Beijing, B...",E...,Positive
1,2020-02-27,newsparliament.com,Children With Autism Saw Their Learning and So...,Coronavirus Could Explode in the U.S. Overnigh...,coronavirus could explode overnight like italy...,0.9988,Positive,0,0.9987,Positive,"[[Kiwi, the University of Southern California,...","[Kiwi, the University of Southern California, ...","[Levi Strauss, Kiwi, Kiwi, Kiwi, Shomik Jain, ...","[Explode, U.S., Explode, U.S., U.S., South Car...",...,Negative


In [25]:
df_filtered['sentiment_bert'].value_counts()

Positive    115731
Negative     49799
Name: sentiment_bert, dtype: int64

In [27]:
df_filtered.to_pickle('filtered_data_sentiment_NER.pkl')

## Sentiment Analysis with Customized Vader Lexicon (news level)

In [38]:
df_filtered = pd.read_parquet('filtered_data_sentiment.parquet')
df_filtered.head()

Unnamed: 0,date,domain,clean_title,clean_text,clean_token,sentiment_score_vader,sentiment_vader,sentiment_svm
0,2021-03-18,en.people.cn,Artificial intelligence improves parking effic...,Thanks to the application of an artificial int...,thanks application artificial intelligence ai ...,0.997,Positive,0
1,2020-02-27,newsparliament.com,Children With Autism Saw Their Learning and So...,Coronavirus Could Explode in the U.S. Overnigh...,coronavirus could explode overnight like italy...,0.9988,Positive,0
2,2021-03-26,www.dataweek.co.za,"Forget ML, AI and Industry 4.0",*mar21-abi Many decision-makers only realise t...,many realise problem hand regret following tag...,0.9965,Positive,0
3,2021-03-10,www.homeoffice.consumerelectronicsnet.com,Strategy Analytics: 71% of Smartphones Sold Gl...,BOSTON–(BUSINESS WIRE)–Strategy Analytics in a...,business wire analytics newly published report...,0.9991,Positive,1
4,2020-10-20,www.itbusinessnet.com,Olympus to Support Endoscopic AI Diagnosis Edu...,"TOKYO, Oct 20, 2020 – (ACN Newswire) – Olympus...",tokyo oct acn newswire olympus corporation too...,0.9936,Positive,0


In [30]:
customized_analyzer = SentimentIntensityAnalyzer()
customized_analyzer.lexicon.update({
    word: 1.0 for word in ['breakthrough', 'innovation', 'scalability', 'streamlining', 'exploration', 'flexible', 'creation', 'sustainable', 'unleash', 'dynamic', 'empower', 'proactive', 'responsive', 'collaborative', 'integrated', 'personalization', 'inclusive', 'advancement', 'profitability', 'outperform', 'exceed', 'tailored', 'seamless', 'leverage', 'enhancement', 'unprecedented', 'revolutionary', 'groundbreaking', 'mining','opportunity', 'growth', 'automation', 'efficiency', 'productivity', 'competitiveness', 'prospects','development', 'progress', 'transformation', 'intelligence', 'digitization', 'smart', 'wisdom', 'optimization', 'data-driven',  'accuracy', 'high-quality', 'high-efficiency', 'cost-saving', 'safety', 'reliability',  'leading', 'leader', 'competitor', 'advantage', 'accelerate', 'excel', 'reinvent', 'simplify', 'maximize', 'enrich', 'facilitate', 'inspire', 'ignite', 'cultivate', 'reliable', 'diversify', 'monetize', 'empowering', 'interoperability', 'ingenuity', 'visionary', 'meticulous', 'proliferation', 'synergize', 'enhance', 'adaptive', 'revitalize', 'rejuvenate', 'spearhead']
})

customized_analyzer.lexicon.update({
    word: -1.0 for word in ['disruptive', 'detrimental', 'compromise', 'vulnerable', 'intrusive', 'exploitation', 'overdependence', 'oversimplification', 'misinterpretation', 'bias', 'unethical', 'exclusion', 'distrust', 'alienation', 'fraudulent', 'counterfeit', 'substandard', 'obsolete', 'impersonal', 'inefficient', 'loss', 'high risk', 'heavy burden', 'high challenge',  'opposition','stagnation', 'inflexibility','unemployment', 'uncertainty', 'contraction', 'reduction', 'downsizing', 'bottleneck', 'challenge', 'obstacle', 'risk', 'insecurity', 'instability', 'unreliability',  'restriction', 'limitation', 'regression', 'backwardness', 'failure', 'defect', 'problem', 'difficulty', 'negative', 'pessimistic', 'worry', 'fear', 'inadequacy', 'pressure', 'struggle', 'plight', 'crisis', 'frustration', 'disappointment', 'injury', 'hopelessness', 'pessimism', 'downward trend', 'stubbornness', 'conservatism', 'resistance', 'constraint',  'outdated', 'slow', 'low-efficiency', 'low-quality', 'high-cost']
})


#### News Level

In [39]:
%%time
sentiment_vader = pd.DataFrame()
sentiment_vader['vader'] = df_filtered['clean_text'].parallel_apply(customized_analyzer.polarity_scores)
sentiment_vader = pd.DataFrame(sentiment_vader['vader'].tolist())

# Create a positive / negative sentiment variable
sentiment_vader['sentiment'] = np.where(sentiment_vader['compound'].astype(float) > 0, 'Positive', 'Negative')
sentiment_vader['sentiment'] = np.where(sentiment_vader['compound'].astype(float) == 0, 'Neutral', sentiment_vader['sentiment'])

# Merge Vader values with Sentiments
df_filtered = pd.merge(df_filtered, sentiment_vader, left_index=True, right_index=True)
df_filtered.head(1)

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=8713), Label(value='0 / 8713'))), …

CPU times: total: 7.7 s
Wall time: 14min 19s


Unnamed: 0,date,domain,clean_title,clean_text,clean_token,sentiment_score_vader,sentiment_vader,sentiment_svm,neg,neu,pos,compound,sentiment
0,2021-03-18,en.people.cn,Artificial intelligence improves parking effic...,Thanks to the application of an artificial int...,thanks application artificial intelligence ai ...,0.997,Positive,0,0.026,0.843,0.131,0.9961,Positive


In [40]:
df_filtered.head(1)

Unnamed: 0,date,domain,clean_title,clean_text,clean_token,sentiment_score_vader,sentiment_vader,sentiment_svm,neg,neu,pos,compound,sentiment
0,2021-03-18,en.people.cn,Artificial intelligence improves parking effic...,Thanks to the application of an artificial int...,thanks application artificial intelligence ai ...,0.997,Positive,0,0.026,0.843,0.131,0.9961,Positive


In [41]:
df_filtered = df_filtered.drop(['neg', 'neu', 'pos'], axis = 1)

In [42]:
df_filtered = df_filtered.rename(columns={'compound': 'sentiment_score_vader_customized', 'sentiment': 'sentiment_vader_customized'})
df_filtered.head(1)

Unnamed: 0,date,domain,clean_title,clean_text,clean_token,sentiment_score_vader,sentiment_vader,sentiment_svm,sentiment_score_vader_customized,sentiment_vader_customized
0,2021-03-18,en.people.cn,Artificial intelligence improves parking effic...,Thanks to the application of an artificial int...,thanks application artificial intelligence ai ...,0.997,Positive,0,0.9961,Positive


In [44]:
df_filtered.sentiment_vader_customized.value_counts()

Positive    146582
Negative     15307
Neutral       3641
Name: sentiment_vader_customized, dtype: int64

In [45]:
df_filtered.sentiment_vader.value_counts()

Positive    145672
Negative     15499
Neutral       4359
Name: sentiment_vader, dtype: int64

In [46]:
df_filtered.to_parquet('filtered_data_sentiment.parquet')

## Sentiment Analysis with RoBert

In [7]:
# !pip install transformers

In [44]:
# !pip uninstall -y torch

In [46]:
# !pip install torch torchvision torchaudio

In [7]:
from tqdm import tqdm
tqdm.pandas()

In [8]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

In [9]:
import torch, torchvision, torchaudio
print(torch.__version__)

2.0.1+cu117


In [13]:
tokenizer = AutoTokenizer.from_pretrained("mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis")
model = AutoModelForSequenceClassification.from_pretrained("mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis")

In [None]:
# !pip uninstall -y ipywidgets

In [49]:
# !pip install ipywidgets==7.5.1 --upgrade

In [10]:
%%time
df_filtered = pd.read_pickle('filtered_data_sentiment_NER.pkl')
df_filtered.head(1)

CPU times: user 27.2 s, sys: 4.37 s, total: 31.6 s
Wall time: 42.3 s


Unnamed: 0,date,domain,clean_title,clean_text,clean_token,sentiment_score_vader,sentiment_vader,sentiment_svm,sentiment_score_vader_customized,sentiment_vader_customized,NER_word,ORG_NER,PERSON_NER,GPE_NER,sentiment_ner
0,2021-03-18,en.people.cn,Artificial intelligence improves parking effic...,Thanks to the application of an artificial int...,thanks application artificial intelligence ai ...,0.997,Positive,0,0.9961,Positive,"[[ETC, ETC, AI, Wang, ETC, Wang, ETC, Ministry...","[ETC, ETC, AI, Wang, ETC, Wang, ETC, Ministry ...","[Xiang Yanping, Xiang]","[China, Beijing, Chaoyang, Beijing, Beijing, B...",E...


In [14]:
def get_sentiment(text):
    inputs = tokenizer(text, padding=True, truncation=True, return_tensors='pt')
    logits = model(**inputs).logits
    
    predicted_class_id = logits.argmax().item()
    label = model.config.id2label[predicted_class_id]
    
    return label

In [15]:
df_filtered['sentiment_robert'] = df_filtered.clean_text.progress_apply(get_sentiment)

100%|██████████| 165530/165530 [6:09:56<00:00,  7.46it/s]  


In [None]:
df_filtered.sentiment_robert.value_counts()

neutral     88726
positive    67865
negative     8939
Name: sentiment_robert, dtype: int64

In [None]:
df_filtered.head(1)

Unnamed: 0,date,domain,clean_title,clean_text,clean_token,sentiment_score_vader,sentiment_vader,sentiment_svm,sentiment_score_vader_customized,sentiment_vader_customized,NER_word,ORG_NER,PERSON_NER,GPE_NER,sentiment_ner,sentiment_robert
0,2021-03-18,en.people.cn,Artificial intelligence improves parking effic...,Thanks to the application of an artificial int...,thanks application artificial intelligence ai ...,0.997,Positive,0,0.9961,Positive,"[[ETC, ETC, AI, Wang, ETC, Wang, ETC, Ministry...","[ETC, ETC, AI, Wang, ETC, Wang, ETC, Ministry ...","[Xiang Yanping, Xiang]","[China, Beijing, Chaoyang, Beijing, Beijing, B...",E...,positive


In [None]:
df_filtered.to_pickle('filtered_data_sentiment.pkl')

In [19]:
df_sentiment_robert = df_filtered['sentiment_robert'].to_frame()

In [21]:
df_sentiment_robert.to_pickle("sentiment_robert.pkl")

In [22]:
df_sentiment_robert.head()

Unnamed: 0,sentiment_robert
0,positive
1,neutral
2,negative
3,positive
4,neutral
