In [1]:
!pip install -U sentence-transformers
!pip install flair

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting torch>=1.6.0
  Using cached torch-1.11.0-cp37-cp37m-manylinux1_x86_64.whl (750.6 MB)
Installing collected packages: torch
  Attempting uninstall: torch
    Found existing installation: torch 1.10.2
    Uninstalling torch-1.10.2:
      Successfully uninstalled torch-1.10.2
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
pyabsa 1.15.3 requires torch<1.11.0,>1.0.0, but you have torch 1.11.0 which is incompatible.[0m
Successfully installed torch-1.11.0
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [2]:
from sentence_transformers import SentenceTransformer
from transformers import pipeline
from transformers import AutoModelForSequenceClassification
from transformers import TFAutoModelForSequenceClassification
from transformers import AutoTokenizer
import numpy as np
from scipy.special import softmax
import csv
import urllib.request
from flair.data import Sentence
from flair.models import SequenceTagger



class nlu_engine :
  '''
  Encoding models  : Models used for encoding ["all-mpnet-base-v2","all-mpnet-base-v1","all-roberta-large-v1","dmis-lab/biobert-base-cased-v1.1"]
  '''
  def __init__(self,encoding_model="all-mpnet-base-v2",summarizer_model="sshleifer/distilbart-cnn-12-6",max_length_summarizer=130,min_length_summarizer=40
               ,topic = ["medical","disease","sports","politics","weather"],flair_model="flair/ner-english-large"):
    self.topic = topic
    self.model = SentenceTransformer(encoding_model)
    self.summarizer_model = pipeline("summarization", model=summarizer_model)
    self.max_length_summarizer = max_length_summarizer
    self.min_length_summarizer = min_length_summarizer
    self.tagger = SequenceTagger.load(flair_model)

    self.classifier = pipeline("zero-shot-classification")

    task='sentiment'
    MODEL = f"cardiffnlp/twitter-roberta-base-{task}"

    self.tokenizer = AutoTokenizer.from_pretrained(MODEL)

    # download label mapping
    labels=[]
    mapping_link = f"https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/{task}/mapping.txt"
    with urllib.request.urlopen(mapping_link) as f:
        html = f.read().decode('utf-8').split("\n")
        csvreader = csv.reader(html, delimiter='\t')
    self.labels = [row[1] for row in csvreader if len(row) > 1]

    # PT
    self.sentiment_model = AutoModelForSequenceClassification.from_pretrained(MODEL)

  def encoding(self,text):
    embeddings = self.model.encode(text)
    return embeddings

  def summarizer(self,text):
    return self.summarizer_model(text, max_length=self.max_length_summarizer, min_length=self.min_length_summarizer, do_sample=False)

  def absa_sentiment(self,text):
    encoded_input = self.tokenizer(text, return_tensors='pt')
    output = self.sentiment_model(**encoded_input)
    scores = output[0][0].detach().numpy()
    scores = softmax(scores)

    ranking = np.argsort(scores)
    ranking = ranking[::-1]
    result = []
    for i in range(scores.shape[0]):
        l = self.labels[ranking[i]]
        s = scores[ranking[i]]
        result.append([l,s])
    return result

  def topic_classification(self,text):

    x = self.classifier(text, self.topic)
    return [[i,j] for i,j in zip(x["labels"],x["scores"])]

  def ner_model(self,text):
    text = Sentence("George Washington went to Washington")

    # predict NER tags
    self.tagger.predict(text)

    # print sentence
    print(text)

    # print predicted NER spans
    print('The following NER tags are found:')
    # iterate over entities and print
    x=[]
    for entity in text.get_spans('ner'):
        x.append(entity)
    return x



In [3]:
obj = nlu_engine(encoding_model="all-mpnet-base-v2")



2022-06-22 08:43:24,096 loading file /root/.flair/models/ner-english-large/07301f59bb8cb113803be316267f06ddf9243cdbba92a4c8067ef92442d2c574.554244d3476d97501a766a98078421817b14654496b86f2f7bd139dc502a4f29
2022-06-22 08:44:03,119 SequenceTagger predicts: Dictionary with 20 tags: <unk>, O, S-ORG, S-MISC, B-PER, E-PER, S-LOC, B-ORG, E-ORG, I-PER, S-PER, B-MISC, I-MISC, E-MISC, I-ORG, B-LOC, E-LOC, I-LOC, <START>, <STOP>


In [4]:
obj.topic_classification("i want to play football")

[['sports', 0.9964057803153992],
 ['weather', 0.0010905396193265915],
 ['medical', 0.0010541524970903993],
 ['politics', 0.0007349809166043997],
 ['disease', 0.000714505382347852]]

In [5]:
obj.ner_model("george washington died of viagra overdoes")

Sentence: "George Washington went to Washington" → ["George Washington"/PER, "Washington"/LOC]
The following NER tags are found:


[Span[0:2]: "George Washington" → PER (1.0),
 Span[4:5]: "Washington" → LOC (1.0)]