<a href="https://colab.research.google.com/github/VertaAI/modeldb/blob/master/client/workflows/demos/distilbert-sentiment-classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
os.environ['VERTA_HOST'] = 
os.environ['VERTA_EMAIL'] = 
os.environ['VERTA_DEV_KEY'] = 

In [2]:
!pip install verta torch transformers

# Models

In [3]:
from transformers import (
    pipeline,
    AutoModelForSequenceClassification,
    AutoTokenizer,
)

In [4]:
class Model:
    MODEL = None

    def __init__(self):
        self.model = pipeline(
            task="sentiment-analysis",
            model=AutoModelForSequenceClassification.from_pretrained(self.MODEL),
            tokenizer=AutoTokenizer.from_pretrained(self.MODEL),
        )

    def predict(self, text):
        return self.model(text)[0]


class DistilBERT(Model):
    MODEL = "distilbert-base-uncased-finetuned-sst-2-english"

    def predict(self, text):
        sentiment = super(DistilBERT, self).predict(text)

        return sentiment


class MultilingualBERT(Model):
    MODEL = "nlptown/bert-base-multilingual-uncased-sentiment"
    
    def __init__(self):
        super(MultilingualBERT, self).__init__()
        self.model.return_all_scores = True  # this model has 5 categories, and we'll need to make it 2
        

    def predict(self, text):
        
        scores = super(MultilingualBERT, self).predict(text)
        scores = sorted(scores, key=lambda score: score['score'], reverse=True)
        sentiment = scores[0]

        # fix label
        if sentiment['label'].startswith(('1', '2', '3')):
            sentiment['label'] = "NEGATIVE"
        else:  # ('4', '5')
            sentiment['label'] = "POSITIVE"
        # aggregate score
        sentiment['score'] = sum(score['score'] for score in scores[:3])
        return sentiment


class BERT(Model):
    MODEL = "textattack/bert-base-uncased-imdb"

    def predict(self, text):
        sentiment = super(BERT, self).predict(text)

        # fix label
        if sentiment['label'] == "LABEL_0":
            sentiment['label'] = "NEGATIVE"
        else:  # "LABEL_1"
            sentiment['label'] = "POSITIVE"

        return sentiment


class GermanBERT(Model):
    MODEL = "oliverguhr/german-sentiment-bert"

    def predict(self, text):
        sentiment = super(GermanBERT, self).predict(text)
        
        # fix label
        sentiment['label'] = sentiment['label'].upper()

        return sentiment

In [5]:
distilbert = DistilBERT()
multilingual_bert = MultilingualBERT()
bert = BERT()
german_bert = GermanBERT()

print(distilbert.predict("I like you"))
print(multilingual_bert.predict("I like you"))
print(bert.predict("I like you"))
print(german_bert.predict("I like you"))

# Logging Runs

In [6]:
from verta import Client

client = Client()

In [7]:
client.create_project(
    "Text Classification",
    desc="Models trained for textual sentiment classification.",
    tags=["NLP", "Classification", "Q4"],
    attrs={'team': "Verta"},
)

In [8]:
client.create_experiment("DistilBERT", tags=["Neural Net"])

run = client.create_experiment_run(
    "First DistilBERT",
    tags=["DistilBERT", "English"],
)
run.log_model(distilbert, custom_modules=[])
run.log_requirements(["torch", "transformers"])

In [9]:
client.create_experiment("BERT", tags=["Neural Net"])

run = client.create_experiment_run(
    "First BERT",
    tags=["BERT", "English"],
)
run.log_model(bert, custom_modules=[])
run.log_requirements(["torch", "transformers"])

run = client.create_experiment_run(
    "Multilingual",
    tags=["BERT", "English", "German"],
)
run.log_model(multilingual_bert, custom_modules=[])
run.log_requirements(["torch", "transformers"])

run = client.create_experiment_run(
    "German",
    tags=["BERT", "German"],
)
run.log_model(german_bert, custom_modules=[])
run.log_requirements(["torch", "transformers"])

---