# Pipelining for Stacked Ensemble Sentiment Analysis Model

In [81]:
pip install nltk

Note: you may need to restart the kernel to use updated packages.


In [196]:
import flair
from transformers import AutoModelForSequenceClassification
from transformers import TFAutoModelForSequenceClassification
from transformers import AutoTokenizer, AutoConfig
from scipy.special import softmax
from textblob import TextBlob
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import pandas as pd
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.preprocessing import FunctionTransformer
import joblib
from sklearn import set_config
from sklearn import metrics
from sklearn.metrics import classification_report
import sklearn.metrics
from sklearn.metrics import accuracy_score, confusion_matrix, roc_auc_score, roc_curve



In [83]:
MODEL = f"cardiffnlp/twitter-roberta-base-sentiment-latest"

tokenizer = AutoTokenizer.from_pretrained(MODEL)
config = AutoConfig.from_pretrained(MODEL)
bert_model = AutoModelForSequenceClassification.from_pretrained(MODEL)
flair_model = flair.models.TextClassifier.load('en-sentiment')

def process_flair(dialogue):   # returning the flair score
    sentence = flair.data.Sentence(dialogue)
    flair_model.predict(sentence)
    label = sentence.labels[0].value
    score = sentence.labels[0].score
    if label == 'POSITIVE':
        return score
    elif label == 'NEGATIVE':
        return -score



def return_sentiment(txt):  # returning single-sentence BERT score
    encoded_input = tokenizer(txt, return_tensors='pt',padding=True,truncation=True)
    output = bert_model(**encoded_input)
    score = output[0][0].detach().numpy() 
    scores = softmax(score)
    if np.argsort(scores)[2] == 1:
        return 0
    else:
        return (np.argsort(scores)[2]-1)*scores[np.argsort(scores)[2]]
    
def tb_score(txt):
    sen = TextBlob(txt)
    return pd.Series({'tb': sen.sentiment.polarity})

def cal_vader_textblob_bert_flair(txt):
    tb_score = TextBlob(txt).sentiment.polarity
    obj = SentimentIntensityAnalyzer()
    vader_score = obj.polarity_scores(txt)['compound']
    flair_score = process_flair(txt)
    bert_score = return_sentiment(txt)
    #prob = logmodel3.predict_proba([[tb_score,vader_score,flair_score,bert_score]])[0]      
    return np.array([[vader_score, tb_score,bert_score,flair_score]])



In [84]:
# Load the trained model (replace 'trained_model.pkl' with your model file)
conversational_gb_loaded_model = joblib.load('conversational_gb_sav_model.pkl')
conversational_lr_loaded_model = joblib.load('conversational_lr_sav_model.pkl')


In [85]:
conversational_lr_loaded_model.predict([[1,1,1,1]])

array([1])

In [86]:
func_tfmr = FunctionTransformer(func=cal_vader_textblob_bert_flair)

In [87]:
func_tfmr.transform("I love this pastry")

array([[0.6369    , 0.5       , 0.97106189, 0.99931455]])

In [88]:
Conv_LR_Pipe = Pipeline ([
    ('base_models_scores',func_tfmr),
    ('meta_model_lr',conversational_lr_loaded_model)
])
Conv_GB_Pipe = Pipeline ([
    ('base_models_scores',func_tfmr),
    ('meta_model_lr',conversational_gb_loaded_model)
])

In [89]:
Conv_LR_Pipe.predict("I just very much dislike hate kill this pastry")

array([-1])

# Creating Parallel Pipelines
Here we make use of a wrapper to enable Fit Transform since our model is only predict

In [148]:
# This is the wrapper
class PredictionTransformer(BaseEstimator, TransformerMixin):
    def __init__(self, model):
        self.model = model
    
    def fit(self, X, y=None):
        return self
    
    def transform(self, X):
        return self.model.predict_proba(X)

In [124]:
Conv_LR_Pipe = Pipeline ([
    ('base_models_scores',func_tfmr),
    ('meta_model_lr',PredictionTransformer(model =conversational_lr_loaded_model ))
])
Conv_GB_Pipe = Pipeline ([
    ('base_models_scores',func_tfmr),
    ('meta_model_gb',PredictionTransformer(model =conversational_gb_loaded_model ))
])

In [134]:
Conv_LR_Pipe.transform("This is a tasty ice cream")

array([[0.04317196, 0.95682804]])

In [135]:
Conv_GB_Pipe.transform("This is a tasty ice cream")

array([[0.24905813, 0.75094187]])

In [125]:
conv_parallel_pipe = FeatureUnion([
    ('pipe_conv_lr', Conv_LR_Pipe),
    ('pipe_conv_gb',Conv_GB_Pipe)
])

In [126]:
set_config(display = "diagram")

In [127]:
conv_parallel_pipe

In [156]:
conv_parallel_pipe.transform("This is a tasty ice cream")

In [248]:
def conv_func(arr):
    negative = (arr[0][0]+arr[0][2])/2
    positive = (arr[0][1]+arr[0][3])/2
    #if positive >= negative:
    #    return 1
    #else:
    #    return -1
    if arr[0][1]>=arr[0][1]:
        return 1
    else:
        return -1


    

In [249]:
conv_final_func = FunctionTransformer(func=conv_func)

In [250]:
Conv_final_pipeline = Pipeline([
    ('conv_overall_pipe',conv_parallel_pipe),
    ('conv_final_func', conv_final_func)
])

In [251]:
Conv_final_pipeline.transform("should I be considering so many deaths today")

1

In [252]:
set_config(display = "diagram")
Conv_final_pipeline

In [253]:
conv_testdf = pd.read_csv('conversational_test.csv')

In [254]:
conv_testdf['Sentiment'].value_counts()

 1    1000
-1    1000
Name: Sentiment, dtype: int64

In [255]:
conv_testdf

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,Headline,Sentiment
0,0,50368,"Well , the life style is much more relaxed th...",1
1,1,75655,I have a complaint to make . I've just been ba...,-1
2,2,43391,"Yes , orange juice will be fine for me . But ...",1
3,3,18291,Your best isn't good enough . Back in my day ...,-1
4,4,42665,"Happy Women's Day . I love you forever , Mum .",1
...,...,...,...,...
1995,1995,6702,Thank you .,1
1996,1996,44170,"Oh , Mary , come in , please . I'm so happy to...",1
1997,1997,37295,I hate to see the abuse of animals .,-1
1998,1998,12770,"Happy birthday , John . Many happy returns of ...",1


In [256]:
conv_senti_output = []

In [257]:
for i in range (len(conv_testdf)):
    senti = Conv_final_pipeline.transform(conv_testdf['Headline'].iloc[i])
    conv_senti_output.append(senti)

In [258]:
conv_senti_output

[1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,


In [259]:
print(classification_report(conv_testdf['Sentiment'],conv_senti_output))

              precision    recall  f1-score   support

          -1       0.00      0.00      0.00      1000
           1       0.50      1.00      0.67      1000

    accuracy                           0.50      2000
   macro avg       0.25      0.50      0.33      2000
weighted avg       0.25      0.50      0.33      2000



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
