In [3]:
#!pip install vaderSentiment
#!pip install pandarallel

In [4]:
import pandas as pd
import numpy as np
import os
import json
import glob
import datetime
import time
import pytz
#Load the SentimentIntensityAnalyzer object from the VADER package
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

In [5]:
from pandarallel import pandarallel
import multiprocessing

num_processors = multiprocessing.cpu_count()
print(f'Available CPUs: {num_processors}')

Available CPUs: 16


In [6]:
pandarallel.initialize(nb_workers=num_processors-1, use_memory_fs=False, progress_bar = True)

INFO: Pandarallel will run on 15 workers.
INFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.


In [6]:
%%time
df = pd.read_parquet("data_topic.parquet")

CPU times: user 29.2 s, sys: 4.79 s, total: 34 s
Wall time: 24.3 s


In [7]:
df.head()

Unnamed: 0,url,date,title,text,main_text,relevant,Tokens,cleaned_tokens,ktrain_topic
0,http://en.people.cn/n3/2021/0318/c90000-983012...,2021-03-18,Artificial intelligence improves parking effic...,\n\nArtificial intelligence improves parking e...,"Photo taken on July 1, 2019, shows a sign for ...",1,"[Photo, taken, on, July, 1, ,, 2019, ,, shows,...","[Photo, taken, July, show, sign, electronic, t...",5
1,http://newsparliament.com/2020/02/27/children-...,2020-02-27,Children With Autism Saw Their Learning and So...,\nChildren With Autism Saw Their Learning and ...,Children With Autism Saw Their Learning and So...,1,"[Children, With, Autism, Saw, Their, Learning,...","[Children, With, Autism, Saw, Their, Learning,...",5
2,http://www.dataweek.co.za/12835r,2021-03-26,"Forget ML, AI and Industry 4.0 – obsolescence ...","\n\nForget ML, AI and Industry 4.0 – obsolesce...","Forget ML, AI and Industry 4.0 – obsolescence ...",1,"[Forget, ML, ,, AI, and, Industry, 4.0, –, obs...","[Forget, ML, AI, Industry, obsolescence, focus...",5
3,http://www.homeoffice.consumerelectronicsnet.c...,2021-03-10,Strategy Analytics: 71% of Smartphones Sold Gl...,\n\nStrategy Analytics: 71% of Smartphones Sol...,Strategy Analytics: 71% of Smartphones Sold Gl...,1,"[Strategy, Analytics, :, 71, %, of, Smartphone...","[Strategy, Analytics, Smartphones, Sold, Globa...",0
4,http://www.itbusinessnet.com/2020/10/olympus-t...,2020-10-20,Olympus to Support Endoscopic AI Diagnosis Edu...,\n\nOlympus to Support Endoscopic AI Diagnosis...,Olympus to Support Endoscopic AI Diagnosis Edu...,1,"[Olympus, to, Support, Endoscopic, AI, Diagnos...","[Olympus, Support, Endoscopic, AI, Diagnosis, ...",7


In [8]:
df.shape

(127739, 9)

## VADER

In [9]:
#Create a handle to the SentimentIntensityAnalyzer object
analyzer = SentimentIntensityAnalyzer()

In [10]:
def vader_score(text):
    score = analyzer.polarity_scores(text)
    return score['compound']

In [11]:
%%time
df['vader_score'] = df['main_text'].parallel_apply(vader_score)

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=8516), Label(value='0 / 8516'))), …

CPU times: user 6.27 s, sys: 3.5 s, total: 9.77 s
Wall time: 13min 20s


In [12]:
df['vader_sentiment'] = np.where(df['vader_score'].astype(np.float) > 0, 'Positive', 'Negative')
df['vader_sentiment'] = np.where(df['vader_score'].astype(np.float) == 0, 'Neutral', df['vader_sentiment'])

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  """Entry point for launching an IPython kernel.
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  


In [13]:
df.head()

Unnamed: 0,url,date,title,text,main_text,relevant,Tokens,cleaned_tokens,ktrain_topic,vader_score,vader_sentiment
0,http://en.people.cn/n3/2021/0318/c90000-983012...,2021-03-18,Artificial intelligence improves parking effic...,\n\nArtificial intelligence improves parking e...,"Photo taken on July 1, 2019, shows a sign for ...",1,"[Photo, taken, on, July, 1, ,, 2019, ,, shows,...","[Photo, taken, July, show, sign, electronic, t...",5,0.9954,Positive
1,http://newsparliament.com/2020/02/27/children-...,2020-02-27,Children With Autism Saw Their Learning and So...,\nChildren With Autism Saw Their Learning and ...,Children With Autism Saw Their Learning and So...,1,"[Children, With, Autism, Saw, Their, Learning,...","[Children, With, Autism, Saw, Their, Learning,...",5,0.998,Positive
2,http://www.dataweek.co.za/12835r,2021-03-26,"Forget ML, AI and Industry 4.0 – obsolescence ...","\n\nForget ML, AI and Industry 4.0 – obsolesce...","Forget ML, AI and Industry 4.0 – obsolescence ...",1,"[Forget, ML, ,, AI, and, Industry, 4.0, –, obs...","[Forget, ML, AI, Industry, obsolescence, focus...",5,0.9967,Positive
3,http://www.homeoffice.consumerelectronicsnet.c...,2021-03-10,Strategy Analytics: 71% of Smartphones Sold Gl...,\n\nStrategy Analytics: 71% of Smartphones Sol...,Strategy Analytics: 71% of Smartphones Sold Gl...,1,"[Strategy, Analytics, :, 71, %, of, Smartphone...","[Strategy, Analytics, Smartphones, Sold, Globa...",0,0.9988,Positive
4,http://www.itbusinessnet.com/2020/10/olympus-t...,2020-10-20,Olympus to Support Endoscopic AI Diagnosis Edu...,\n\nOlympus to Support Endoscopic AI Diagnosis...,Olympus to Support Endoscopic AI Diagnosis Edu...,1,"[Olympus, to, Support, Endoscopic, AI, Diagnos...","[Olympus, Support, Endoscopic, AI, Diagnosis, ...",7,0.9923,Positive


In [None]:
import pyarrow as pa
import pyarrow.parquet as pq
table = pa.Table.from_pandas(df)
pq.write_table(table, './data_vader_score.parquet')

## SVM trained on Yelp data

In [15]:
import pickle
import pandas as pd

In [None]:
%%time
df = pd.read_parquet("data_vader_score.parquet")

CPU times: user 35.4 s, sys: 7.37 s, total: 42.8 s
Wall time: 44.4 s


In [None]:
df.head()

Unnamed: 0,date,title,text,main_text,Tokens,cleaned_tokens,vader_score,vader_sentiment
0,2021-03-18,Artificial intelligence improves parking effic...,\n\nArtificial intelligence improves parking e...,"Photo taken on July 1, 2019, shows a sign for ...","[Photo, taken, on, July, 1, ,, 2019, ,, shows,...","[Photo, taken, July, show, sign, electronic, t...",0.9954,Positive
1,2020-02-27,Children With Autism Saw Their Learning and So...,\nChildren With Autism Saw Their Learning and ...,Children With Autism Saw Their Learning and So...,"[Children, With, Autism, Saw, Their, Learning,...","[Children, With, Autism, Saw, Their, Learning,...",0.998,Positive
2,2021-03-26,"Forget ML, AI and Industry 4.0 – obsolescence ...","\n\nForget ML, AI and Industry 4.0 – obsolesce...","Forget ML, AI and Industry 4.0 – obsolescence ...","[Forget, ML, ,, AI, and, Industry, 4.0, –, obs...","[Forget, ML, AI, Industry, obsolescence, focus...",0.9967,Positive
3,2021-03-10,Strategy Analytics: 71% of Smartphones Sold Gl...,\n\nStrategy Analytics: 71% of Smartphones Sol...,Strategy Analytics: 71% of Smartphones Sold Gl...,"[Strategy, Analytics, :, 71, %, of, Smartphone...","[Strategy, Analytics, Smartphones, Sold, Globa...",0.9988,Positive
4,2020-10-20,Olympus to Support Endoscopic AI Diagnosis Edu...,\n\nOlympus to Support Endoscopic AI Diagnosis...,Olympus to Support Endoscopic AI Diagnosis Edu...,"[Olympus, to, Support, Endoscopic, AI, Diagnos...","[Olympus, Support, Endoscopic, AI, Diagnosis, ...",0.9923,Positive


In [16]:
with open('vect.pkl', 'rb') as f, open('svm_sentiment.pkl', 'rb') as model:
    vect = pickle.load(f)
    svm = pickle.load(model)

In [17]:
text = [' '.join(article) for article in df['cleaned_tokens'].tolist()]

In [18]:
%%time
y_pred = svm.predict(vect.transform(text))

CPU times: user 2min 23s, sys: 1.77 s, total: 2min 25s
Wall time: 2min 25s


In [19]:
df['svm_sentiment'] = y_pred
df.head()

Unnamed: 0,url,date,title,text,main_text,relevant,Tokens,cleaned_tokens,ktrain_topic,vader_score,vader_sentiment,svm_sentiment
0,http://en.people.cn/n3/2021/0318/c90000-983012...,2021-03-18,Artificial intelligence improves parking effic...,\n\nArtificial intelligence improves parking e...,"Photo taken on July 1, 2019, shows a sign for ...",1,"[Photo, taken, on, July, 1, ,, 2019, ,, shows,...","[Photo, taken, July, show, sign, electronic, t...",5,0.9954,Positive,1
1,http://newsparliament.com/2020/02/27/children-...,2020-02-27,Children With Autism Saw Their Learning and So...,\nChildren With Autism Saw Their Learning and ...,Children With Autism Saw Their Learning and So...,1,"[Children, With, Autism, Saw, Their, Learning,...","[Children, With, Autism, Saw, Their, Learning,...",5,0.998,Positive,0
2,http://www.dataweek.co.za/12835r,2021-03-26,"Forget ML, AI and Industry 4.0 – obsolescence ...","\n\nForget ML, AI and Industry 4.0 – obsolesce...","Forget ML, AI and Industry 4.0 – obsolescence ...",1,"[Forget, ML, ,, AI, and, Industry, 4.0, –, obs...","[Forget, ML, AI, Industry, obsolescence, focus...",5,0.9967,Positive,0
3,http://www.homeoffice.consumerelectronicsnet.c...,2021-03-10,Strategy Analytics: 71% of Smartphones Sold Gl...,\n\nStrategy Analytics: 71% of Smartphones Sol...,Strategy Analytics: 71% of Smartphones Sold Gl...,1,"[Strategy, Analytics, :, 71, %, of, Smartphone...","[Strategy, Analytics, Smartphones, Sold, Globa...",0,0.9988,Positive,1
4,http://www.itbusinessnet.com/2020/10/olympus-t...,2020-10-20,Olympus to Support Endoscopic AI Diagnosis Edu...,\n\nOlympus to Support Endoscopic AI Diagnosis...,Olympus to Support Endoscopic AI Diagnosis Edu...,1,"[Olympus, to, Support, Endoscopic, AI, Diagnos...","[Olympus, Support, Endoscopic, AI, Diagnosis, ...",7,0.9923,Positive,0


In [20]:
import pyarrow as pa
import pyarrow.parquet as pq
table = pa.Table.from_pandas(df)
pq.write_table(table, './data_sentiment_topic.parquet')

## Customized Vader

In [6]:
new = pd.read_parquet('data_sentiment_topic.parquet')
new.head()

Unnamed: 0,url,date,title,text,main_text,relevant,Tokens,cleaned_tokens,ktrain_topic,vader_score,vader_sentiment,svm_sentiment
0,http://en.people.cn/n3/2021/0318/c90000-983012...,2021-03-18,Artificial intelligence improves parking effic...,\n\nArtificial intelligence improves parking e...,"Photo taken on July 1, 2019, shows a sign for ...",1,"[Photo, taken, on, July, 1, ,, 2019, ,, shows,...","[Photo, taken, July, show, sign, electronic, t...",5,0.9954,Positive,1
1,http://newsparliament.com/2020/02/27/children-...,2020-02-27,Children With Autism Saw Their Learning and So...,\nChildren With Autism Saw Their Learning and ...,Children With Autism Saw Their Learning and So...,1,"[Children, With, Autism, Saw, Their, Learning,...","[Children, With, Autism, Saw, Their, Learning,...",5,0.998,Positive,0
2,http://www.dataweek.co.za/12835r,2021-03-26,"Forget ML, AI and Industry 4.0 – obsolescence ...","\n\nForget ML, AI and Industry 4.0 – obsolesce...","Forget ML, AI and Industry 4.0 – obsolescence ...",1,"[Forget, ML, ,, AI, and, Industry, 4.0, –, obs...","[Forget, ML, AI, Industry, obsolescence, focus...",5,0.9967,Positive,0
3,http://www.homeoffice.consumerelectronicsnet.c...,2021-03-10,Strategy Analytics: 71% of Smartphones Sold Gl...,\n\nStrategy Analytics: 71% of Smartphones Sol...,Strategy Analytics: 71% of Smartphones Sold Gl...,1,"[Strategy, Analytics, :, 71, %, of, Smartphone...","[Strategy, Analytics, Smartphones, Sold, Globa...",0,0.9988,Positive,1
4,http://www.itbusinessnet.com/2020/10/olympus-t...,2020-10-20,Olympus to Support Endoscopic AI Diagnosis Edu...,\n\nOlympus to Support Endoscopic AI Diagnosis...,Olympus to Support Endoscopic AI Diagnosis Edu...,1,"[Olympus, to, Support, Endoscopic, AI, Diagnos...","[Olympus, Support, Endoscopic, AI, Diagnosis, ...",7,0.9923,Positive,0


In [7]:
#Create a handle to the SentimentIntensityAnalyzer object
analyzer2 = SentimentIntensityAnalyzer()

In [8]:
#Update the analyzer with customized word list
pos = ['milestone', 'opportunity', 'growth', 'innovation', 'efficiency', 'productivity', 'competitiveness', 'prospects', 
       'sustainability', 'development', 'progress', 'transformation', 'digitization', 'smart', 'wisdom', 
       'optimization', 'data-driven', 'prediction', 'accuracy', 'high-quality', 'high-efficiency', 'cost-saving', 'safety', 
       'reliability', 'sustainability', 'future-oriented', 'leading', 'leader', 'advantage', 'forward-looking', 
       'exploration', 'creation', 'experimentation', 'discovery', 'breakthrough']

neg = ['unemployment', 'uncertainty', 'contraction', 'reduction', 'downsizing', 'bottleneck', 'challenge', 'obstacle', 'risk', 
       'insecurity', 'instability', 'unreliability', 'opacity', 'restriction', 'limitation', 'regression', 'stagnation', 
       'backwardness', 'failure', 'error', 'defect', 'problem', 'difficulty', 'negative', 'pessimistic', 'worry', 'fear', 
       'inadequacy', 'pressure', 'struggle', 'plight', 'crisis', 'frustration', 'disappointment', 'injury', 'hopelessness', 
       'pessimism', 'downward trend', 'stubbornness', 'conservatism', 'resistance', 'constraint', 'stagnation', 'inflexibility', 
       'outdated', 'slow', 'low-efficiency', 'low-quality', 'high-cost', 'adverse impact', 'loss', 'high risk', 'heavy burden', 
       'drag', 'high challenge', 'detrimental to human beings', 'opposition', 'confrontation']

analyzer2.lexicon.update({word: 1.0 for word in pos})
analyzer2.lexicon.update({word: -1.0 for word in neg})

In [9]:
def vader_score2(text):
    score = analyzer2.polarity_scores(text)
    return score['compound']

In [12]:
%%time
new['vader_score2'] = new['main_text'].parallel_apply(vader_score2)

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=8516), Label(value='0 / 8516'))), …

CPU times: user 6.1 s, sys: 3.56 s, total: 9.66 s
Wall time: 13min 29s


In [32]:
new['vader_sentiment2'] = np.where(new['vader_score2'].astype(np.float) > 0, 'Positive', 'Negative')
new['vader_sentiment2'] = np.where(new['vader_score2'].astype(np.float) == 0, 'Neutral', new['vader_sentiment'])

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  """Entry point for launching an IPython kernel.
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  


In [33]:
new.head()

Unnamed: 0,url,date,title,text,main_text,relevant,Tokens,cleaned_tokens,ktrain_topic,vader_score,vader_sentiment,svm_sentiment,vader_score2,textblob_sent,vader_sentiment2
0,http://en.people.cn/n3/2021/0318/c90000-983012...,2021-03-18,Artificial intelligence improves parking effic...,\n\nArtificial intelligence improves parking e...,"Photo taken on July 1, 2019, shows a sign for ...",1,"[Photo, taken, on, July, 1, ,, 2019, ,, shows,...","[Photo, taken, July, show, sign, electronic, t...",5,0.9954,Positive,1,0.9946,Positive,Positive
1,http://newsparliament.com/2020/02/27/children-...,2020-02-27,Children With Autism Saw Their Learning and So...,\nChildren With Autism Saw Their Learning and ...,Children With Autism Saw Their Learning and So...,1,"[Children, With, Autism, Saw, Their, Learning,...","[Children, With, Autism, Saw, Their, Learning,...",5,0.998,Positive,0,0.9981,Positive,Positive
2,http://www.dataweek.co.za/12835r,2021-03-26,"Forget ML, AI and Industry 4.0 – obsolescence ...","\n\nForget ML, AI and Industry 4.0 – obsolesce...","Forget ML, AI and Industry 4.0 – obsolescence ...",1,"[Forget, ML, ,, AI, and, Industry, 4.0, –, obs...","[Forget, ML, AI, Industry, obsolescence, focus...",5,0.9967,Positive,0,0.9975,Positive,Positive
3,http://www.homeoffice.consumerelectronicsnet.c...,2021-03-10,Strategy Analytics: 71% of Smartphones Sold Gl...,\n\nStrategy Analytics: 71% of Smartphones Sol...,Strategy Analytics: 71% of Smartphones Sold Gl...,1,"[Strategy, Analytics, :, 71, %, of, Smartphone...","[Strategy, Analytics, Smartphones, Sold, Globa...",0,0.9988,Positive,1,0.9988,Positive,Positive
4,http://www.itbusinessnet.com/2020/10/olympus-t...,2020-10-20,Olympus to Support Endoscopic AI Diagnosis Edu...,\n\nOlympus to Support Endoscopic AI Diagnosis...,Olympus to Support Endoscopic AI Diagnosis Edu...,1,"[Olympus, to, Support, Endoscopic, AI, Diagnos...","[Olympus, Support, Endoscopic, AI, Diagnosis, ...",7,0.9923,Positive,0,0.9943,Positive,Positive


## PatternAnalyzer from textblob

In [16]:
!pip install textblob

Collecting textblob
  Using cached textblob-0.17.1-py2.py3-none-any.whl (636 kB)
Collecting regex>=2021.8.3
  Using cached regex-2023.5.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (756 kB)
Installing collected packages: regex, textblob
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
ktrain 0.37.0 requires cchardet, which is not installed.
ktrain 0.37.0 requires jieba, which is not installed.
ktrain 0.37.0 requires langdetect, which is not installed.
ktrain 0.37.0 requires syntok>1.3.3, which is not installed.
ktrain 0.37.0 requires whoosh, which is not installed.[0m[31m
[0mSuccessfully installed regex-2023.5.5 textblob-0.17.1


In [17]:
from textblob import TextBlob
from textblob.sentiments import PatternAnalyzer

In [18]:
patternanalyzer = PatternAnalyzer()

In [20]:
def textblob_sent(text):
    score = TextBlob(text, analyzer = patternanalyzer).sentiment.polarity
    
    if score > 0: return 'Positive'
    elif score < 0: return 'Negative'
    else: return 'Neutral'

In [21]:
%%time
new['textblob_sent'] = new['main_text'].parallel_apply(textblob_sent)

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=8516), Label(value='0 / 8516'))), …

CPU times: user 5.91 s, sys: 3.37 s, total: 9.28 s
Wall time: 1min 16s


In [23]:
new.head()

Unnamed: 0,url,date,title,text,main_text,relevant,Tokens,cleaned_tokens,ktrain_topic,vader_score,vader_sentiment,svm_sentiment,vader_score2,vader_sentiment2,textblob_sent
0,http://en.people.cn/n3/2021/0318/c90000-983012...,2021-03-18,Artificial intelligence improves parking effic...,\n\nArtificial intelligence improves parking e...,"Photo taken on July 1, 2019, shows a sign for ...",1,"[Photo, taken, on, July, 1, ,, 2019, ,, shows,...","[Photo, taken, July, show, sign, electronic, t...",5,0.9954,Positive,1,0.9946,Positive,Positive
1,http://newsparliament.com/2020/02/27/children-...,2020-02-27,Children With Autism Saw Their Learning and So...,\nChildren With Autism Saw Their Learning and ...,Children With Autism Saw Their Learning and So...,1,"[Children, With, Autism, Saw, Their, Learning,...","[Children, With, Autism, Saw, Their, Learning,...",5,0.998,Positive,0,0.9981,Positive,Positive
2,http://www.dataweek.co.za/12835r,2021-03-26,"Forget ML, AI and Industry 4.0 – obsolescence ...","\n\nForget ML, AI and Industry 4.0 – obsolesce...","Forget ML, AI and Industry 4.0 – obsolescence ...",1,"[Forget, ML, ,, AI, and, Industry, 4.0, –, obs...","[Forget, ML, AI, Industry, obsolescence, focus...",5,0.9967,Positive,0,0.9975,Positive,Positive
3,http://www.homeoffice.consumerelectronicsnet.c...,2021-03-10,Strategy Analytics: 71% of Smartphones Sold Gl...,\n\nStrategy Analytics: 71% of Smartphones Sol...,Strategy Analytics: 71% of Smartphones Sold Gl...,1,"[Strategy, Analytics, :, 71, %, of, Smartphone...","[Strategy, Analytics, Smartphones, Sold, Globa...",0,0.9988,Positive,1,0.9988,Positive,Positive
4,http://www.itbusinessnet.com/2020/10/olympus-t...,2020-10-20,Olympus to Support Endoscopic AI Diagnosis Edu...,\n\nOlympus to Support Endoscopic AI Diagnosis...,Olympus to Support Endoscopic AI Diagnosis Edu...,1,"[Olympus, to, Support, Endoscopic, AI, Diagnos...","[Olympus, Support, Endoscopic, AI, Diagnosis, ...",7,0.9923,Positive,0,0.9943,Positive,Positive


In [37]:
import pyarrow as pa
import pyarrow.parquet as pq
table = pa.Table.from_pandas(new)
pq.write_table(table, './data_sentiment_topic.parquet')

## Transformer trained on financial news

In [19]:
import pickle 
import pandas as pd
from tqdm import tqdm

In [8]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch 

tokenizer = AutoTokenizer.from_pretrained("mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis")

model = AutoModelForSequenceClassification.from_pretrained("mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis")

In [12]:
%%time
with open('data_res.pkl', 'rb') as f:
    df = pickle.load(f)

CPU times: user 1min 14s, sys: 3.29 s, total: 1min 17s
Wall time: 1min 18s


In [13]:
def get_sentiment(text):
    
    inputs = tokenizer(text, padding = True, truncation = True, return_tensors = 'pt')
    logits = model(**inputs).logits
    
    predicted_class_id = logits.argmax().item()
    label = model.config.id2label[predicted_class_id]

    return label

In [None]:
%%time
bert_sentiment = []
for text in tqdm(df['main_text'].values):
    bert_sentiment.append(get_sentiment(text))

 61%|██████    | 78054/127739 [3:14:46<42:22, 19.54it/s]   

In [None]:
df['bert_sentiment'] = bert_sentiment

In [None]:
%%time
import pickle

with open('data_full.pkl', 'wb') as f:
    pickle.dump(df, f)

## result comparison

In [25]:
new['textblob_sent'].value_counts()

Positive    101876
Negative     18049
Neutral       7814
Name: textblob_sent, dtype: int64

In [27]:
new['vader_sentiment'].value_counts()

Positive    112679
Negative      9677
Neutral       5383
Name: vader_sentiment, dtype: int64

In [34]:
new['vader_sentiment2'].value_counts()

Positive    112674
Negative      9635
Neutral       5430
Name: vader_sentiment2, dtype: int64

In [35]:
new['svm_sentiment'].value_counts()

0    86619
1    41120
Name: svm_sentiment, dtype: int64