# Pipelining for Stacked Ensemble Sentiment Analysis Model
# Fused Pipe

In [96]:
pip install nltk

Note: you may need to restart the kernel to use updated packages.


In [97]:
import flair
from transformers import AutoModelForSequenceClassification
from transformers import TFAutoModelForSequenceClassification
from transformers import AutoTokenizer, AutoConfig
from scipy.special import softmax
from textblob import TextBlob
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import pandas as pd
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.preprocessing import FunctionTransformer
import joblib
from sklearn import set_config
from sklearn import metrics
from sklearn.metrics import classification_report
import sklearn.metrics
from sklearn.metrics import accuracy_score, confusion_matrix, roc_auc_score, roc_curve



In [98]:
MODEL = f"cardiffnlp/twitter-roberta-base-sentiment-latest"

tokenizer = AutoTokenizer.from_pretrained(MODEL)
config = AutoConfig.from_pretrained(MODEL)
bert_model = AutoModelForSequenceClassification.from_pretrained(MODEL)
flair_model = flair.models.TextClassifier.load('en-sentiment')

def process_flair(dialogue):   # returning the flair score
    sentence = flair.data.Sentence(dialogue)
    flair_model.predict(sentence)
    label = sentence.labels[0].value
    score = sentence.labels[0].score
    if label == 'POSITIVE':
        return score
    elif label == 'NEGATIVE':
        return -score



def return_sentiment(txt):  # returning single-sentence BERT score
    encoded_input = tokenizer(txt, return_tensors='pt',padding=True,truncation=True)
    output = bert_model(**encoded_input)
    score = output[0][0].detach().numpy() 
    scores = softmax(score)
    if np.argsort(scores)[2] == 1:
        return 0
    else:
        return (np.argsort(scores)[2]-1)*scores[np.argsort(scores)[2]]
    
def tb_score(txt):
    sen = TextBlob(txt)
    return pd.Series({'tb': sen.sentiment.polarity})

def cal_vader_textblob_bert_flair(txt):
    tb_score = TextBlob(txt).sentiment.polarity
    obj = SentimentIntensityAnalyzer()
    vader_score = obj.polarity_scores(txt)['compound']
    flair_score = process_flair(txt)
    bert_score = return_sentiment(txt)
    #prob = logmodel3.predict_proba([[tb_score,vader_score,flair_score,bert_score]])[0]      
    return np.array([[vader_score, tb_score,bert_score,flair_score]])



In [99]:
# Load the trained model (replace 'trained_model.pkl' with your model file)
fusedersational_gb_loaded_model = joblib.load('fused_gb_classifier.pkl')
fusedersational_lr_loaded_model = joblib.load('fused_logmodel.pkl')


In [100]:
fusedersational_lr_loaded_model.predict([[1,1,1,1]])

array([1])

In [101]:
func_tfmr = FunctionTransformer(func=cal_vader_textblob_bert_flair)

In [102]:
func_tfmr.transform("I love this pastry")

array([[0.6369    , 0.5       , 0.97106189, 0.99931455]])

In [103]:
fused_LR_Pipe = Pipeline ([
    ('base_models_scores',func_tfmr),
    ('meta_model_lr',fusedersational_lr_loaded_model)
])
fused_GB_Pipe = Pipeline ([
    ('base_models_scores',func_tfmr),
    ('meta_model_lr',fusedersational_gb_loaded_model)
])

In [110]:
fused_LR_Pipe.predict("I am so happy for the airline that my baggage was delayed by over 40 hours")

array([1])

# Creating Parallel Pipelines
Here we make use of a wrapper to enable Fit Transform since our model is only predict

In [74]:
# This is the wrapper
class PredictionTransformer(BaseEstimator, TransformerMixin):
    def __init__(self, model):
        self.model = model
    
    def fit(self, X, y=None):
        return self
    
    def transform(self, X):
        return self.model.predict_proba(X)

In [75]:
fused_LR_Pipe = Pipeline ([
    ('base_models_scores',func_tfmr),
    ('meta_model_lr',PredictionTransformer(model =fusedersational_lr_loaded_model ))
])
fused_GB_Pipe = Pipeline ([
    ('base_models_scores',func_tfmr),
    ('meta_model_gb',PredictionTransformer(model =fusedersational_gb_loaded_model ))
])

In [76]:
fused_LR_Pipe.transform("This is a tasty ice cream")

array([[0.05317613, 0.94682387]])

In [77]:
fused_GB_Pipe.transform("This is a tasty ice cream")

array([[0.1715332, 0.8284668]])

In [78]:
fused_parallel_pipe = FeatureUnion([
    ('pipe_fused_lr', fused_LR_Pipe),
    ('pipe_fused_gb',fused_GB_Pipe)
])

In [79]:
set_config(display = "diagram")

In [80]:
fused_parallel_pipe

In [81]:
fused_parallel_pipe.transform("This is not a tasty ice cream")[0][1]

0.00017665754538126997

In [82]:
def fused_func(arr):
    negative = (arr[0][0]+arr[0][2])/2
    positive = (arr[0][1]+arr[0][3])/2
    #if positive >= negative:
    #    return 1
    #else:
    #    return -1
    if arr[0][1]>=arr[0][0]:
        return 1
    else:
        return -1


    

In [83]:
fused_final_func = FunctionTransformer(func=fused_func)

In [84]:
fused_final_pipeline = Pipeline([
    ('fused_overall_pipe',fused_parallel_pipe),
    ('fused_final_func', fused_final_func)
])

In [85]:
fused_final_pipeline.transform("This is not a tasty ice cream, this")

-1

In [86]:
set_config(display = "diagram")
fused_final_pipeline

In [87]:
fused_testdf = pd.read_csv('sst2_test_results.csv')

In [88]:
fused_testdf['Sentiment'].value_counts()

 1    1000
-1    1000
Name: Sentiment, dtype: int64

In [89]:
fused_testdf

Unnamed: 0.1,Unnamed: 0,Headline,Sentiment,vader,textblob,bert,flair,vader_polarity,textblob_polarity,bert_polarity,flair_polarity
0,0,to see a movie with its heart,1,0.0000,0.000000,0.000000,0.996216,1,1,1,1
1,1,so bad it does n't improve upon the experience...,-1,-0.7351,-0.350000,-0.927613,-0.999958,-1,-1,-1,-1
2,2,the reason to go see `` blue crush '' is the p...,1,-0.1531,0.250000,0.962698,0.999964,-1,1,1,1
3,3,impostor is a step down for director gary fled...,-1,0.0000,-0.155556,0.000000,-0.999793,1,-1,1,-1
4,4,satisfyingly scarifying,1,0.4404,0.500000,-0.672349,0.998684,1,1,-1,1
...,...,...,...,...,...,...,...,...,...,...,...
1995,1995,"an undeniably moving film to experience ,",1,0.0000,0.000000,0.971453,0.999741,1,1,1,1
1996,1996,the compelling historical tale,1,0.2263,0.150000,0.791588,0.999016,1,1,1,1
1997,1997,the film buzz and whir ; very little of it,-1,0.0000,-0.243750,0.000000,-0.997583,1,-1,1,-1
1998,1998,", runteldat is something of a triumph .",1,0.4767,0.000000,0.940723,0.998238,1,1,1,1


In [90]:
fused_senti_output = []

In [91]:
for i in range (len(fused_testdf)):
    senti = fused_final_pipeline.transform(fused_testdf['Headline'].iloc[i])
    fused_senti_output.append(senti)

In [92]:
fused_senti_output

[-1,
 -1,
 1,
 -1,
 -1,
 -1,
 -1,
 1,
 -1,
 1,
 -1,
 1,
 1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 1,
 -1,
 1,
 1,
 -1,
 1,
 -1,
 1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 1,
 -1,
 -1,
 -1,
 -1,
 1,
 -1,
 -1,
 1,
 -1,
 -1,
 1,
 -1,
 1,
 -1,
 -1,
 1,
 -1,
 -1,
 1,
 1,
 -1,
 -1,
 1,
 -1,
 1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 1,
 1,
 -1,
 1,
 -1,
 -1,
 -1,
 1,
 -1,
 1,
 -1,
 -1,
 1,
 -1,
 -1,
 -1,
 -1,
 1,
 -1,
 -1,
 -1,
 1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 1,
 1,
 1,
 -1,
 -1,
 1,
 -1,
 -1,
 -1,
 -1,
 1,
 -1,
 -1,
 -1,
 1,
 -1,
 -1,
 1,
 -1,
 1,
 1,
 -1,
 -1,
 -1,
 -1,
 -1,
 1,
 -1,
 1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 1,
 -1,
 -1,
 1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 1,
 1,
 -1,
 1,
 -1,
 -1,
 -1,
 -1,
 1,
 -1,
 1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 1,
 -1,
 -1,
 -1,
 1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 1,
 -1,
 -1,
 -1,
 

In [93]:
print(classification_report(fused_testdf['Sentiment'],fused_senti_output))

              precision    recall  f1-score   support

          -1       0.67      1.00      0.80      1000
           1       1.00      0.50      0.67      1000

    accuracy                           0.75      2000
   macro avg       0.83      0.75      0.74      2000
weighted avg       0.83      0.75      0.74      2000



In [94]:
joblib.dump(fused_final_pipeline,'final_pipe.pkl')

['final_pipe.pkl']