In [1]:
%reset -f

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
import matplotlib.pyplot as plt
plt.style.use('seaborn-v0_8')
import seaborn as sns

from transformers import pipeline

In [4]:
# initialize the text classification pipeline

sentimentClassifier = pipeline(task='zero-shot-classification', model='roberta-large-mnli')

Some weights of the model checkpoint at roberta-large-mnli were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


### Basic Text Classification
The model classifies a list of text segments based on user provided candidate labels

In [5]:
# Tag each text segment in the list as "Threat" or "Not Threat"

candidateLabels = ['threat', 'not threat']

listTextSegments = ['I will help you with your yard work',
                    'I will kill you if you tell anybody']

res = sentimentClassifier(listTextSegments, candidate_labels=candidateLabels)

for i in res:
    tt = i['sequence']
    ss = [i for i in zip(i['labels'], i['scores'])]
    ss = sorted(ss, key=lambda x:x[1], reverse=True)
    ss = ss[0]
    print('\n{}\n{}\n'.format(tt,ss))


I will help you with your yard work
('not threat', 0.8320251107215881)


I will kill you if you tell anybody
('threat', 0.9927989840507507)



In [6]:
# Tag each text segment in the list as "Good" or "Bad" (does the model recognize slangs ?)

candidateLabels = ['good', 'bad']

listTextSegments = ['I aced the test',
                    'I bombed the test']

res = sentimentClassifier(listTextSegments, candidate_labels=candidateLabels)

for i in res:
    tt = i['sequence']
    ss = [i for i in zip(i['labels'], i['scores'])]
    ss = sorted(ss, key=lambda x:x[1], reverse=True)
    ss = ss[0]
    print('\n{}\n{}\n'.format(tt,ss))


I aced the test
('good', 0.9606329798698425)


I bombed the test
('bad', 0.9810301661491394)



### Testing Various Candidate Labels
Given a specific list of text segments, test the model's robustness to candidate-labels that are different but have somewhat similar meanings

In [7]:
listTextSegments = ['I will kill the patient',
                    'I will heal the patient']

In [8]:
candidateLabels = ['dangerous', 'not dangerous']

res = sentimentClassifier(listTextSegments, candidate_labels=candidateLabels)

for i in res:
    tt = i['sequence']
    ss = [i for i in zip(i['labels'], i['scores'])]
    ss = sorted(ss, key=lambda x:x[1], reverse=True)
    ss = ss[0]
    print('\n{}\n{}\n'.format(tt,ss))


I will kill the patient
('dangerous', 0.9822070598602295)


I will heal the patient
('not dangerous', 0.9070127606391907)



In [9]:
candidateLabels = ['good', 'bad']

res = sentimentClassifier(listTextSegments, candidate_labels=candidateLabels)

for i in res:
    tt = i['sequence']
    ss = [i for i in zip(i['labels'], i['scores'])]
    ss = sorted(ss, key=lambda x:x[1], reverse=True)
    ss = ss[0]
    print('\n{}\n{}\n'.format(tt,ss))


I will kill the patient
('bad', 0.9402111172676086)


I will heal the patient
('good', 0.9236281514167786)



In [10]:
candidateLabels = ['malicious', 'benign']

res = sentimentClassifier(listTextSegments, candidate_labels=candidateLabels)

for i in res:
    tt = i['sequence']
    ss = [i for i in zip(i['labels'], i['scores'])]
    ss = sorted(ss, key=lambda x:x[1], reverse=True)
    ss = ss[0]
    print('\n{}\n{}\n'.format(tt,ss))


I will kill the patient
('malicious', 0.9815500378608704)


I will heal the patient
('benign', 0.7874883413314819)



In [11]:
candidateLabels = ['bad intention', 'good intention']

res = sentimentClassifier(listTextSegments, candidate_labels=candidateLabels)

for i in res:
    tt = i['sequence']
    ss = [i for i in zip(i['labels'], i['scores'])]
    ss = sorted(ss, key=lambda x:x[1], reverse=True)
    ss = ss[0]
    print('\n{}\n{}\n'.format(tt,ss))


I will kill the patient
('bad intention', 0.974708616733551)


I will heal the patient
('good intention', 0.9571693539619446)



### Use for Sentiment Extraction
The model can be used to extract sentiments from segments of text by providing appropriate candidate labels

In [12]:
candidateLabels = ['positive', 'negative', 'neutral']

In [13]:
listTextSegments = ['This is so good',
                    'This is not good',
                    'Water is odorless']

In [14]:
res = sentimentClassifier(listTextSegments, candidate_labels=candidateLabels, max_lenght=512, truncation=False)

for i in res:
    tt = i['sequence']
    ss = [i for i in zip(i['labels'], [round(k,4) for k in i['scores']])]
    dictRes = dict(ss)
    print('\nTEXT: {}\n   POSITIVE: {}\n   NEGATIVE: {}\n   NEUTRAL: {}\n'.format(tt,dictRes['positive'],dictRes['negative'],dictRes['neutral']))


TEXT: This is so good
   POSITIVE: 0.978
   NEGATIVE: 0.0018
   NEUTRAL: 0.0201


TEXT: This is not good
   POSITIVE: 0.0024
   NEGATIVE: 0.9927
   NEUTRAL: 0.0049


TEXT: Water is odorless
   POSITIVE: 0.1015
   NEGATIVE: 0.0301
   NEUTRAL: 0.8684

