In [1]:
from datasets import load_dataset

dataset = load_dataset("amazon_reviews_multi", "es")

Reusing dataset amazon_reviews_multi (/Users/ignaciotalaveracepeda/.cache/huggingface/datasets/amazon_reviews_multi/es/1.0.0/f3357bd271e187385a38574fe31b8fb10055303f67fa9fce55e84d08c4870efd)


In [2]:
dataset["validation"][0]

{'language': 'es',
 'product_category': 'wireless',
 'product_id': 'product_es_0873923',
 'review_body': 'Malisimo, muy grande demasiado aparatoso y mal protector de pantalla',
 'review_id': 'es_0417480',
 'review_title': 'Mala compra',
 'reviewer_id': 'reviewer_es_0672978',
 'stars': 1}

# Client setup

In [3]:
import rubrix

from rubrix.sdk.models import * 

In [5]:
API_KEY = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJhZG1pbiIsImV4cCI6MTYxODQzNzczNH0.vlZKlsh11fDZl_DAStYqF1CLZRkOIsTxTOdkJBeWOHY"

rubrix.init(token=API_KEY)

# Store labeled dataset for initial exploration


Here we will store the validation dataset with labels, inputs and associated metadata.


In [6]:
def create_record(idx, inputs, label, metadata):
    return TextClassificationRecord.from_dict({
        "idx": idx,
        "inputs": inputs,
        "annotation": {
             "agent": "test",
             "labels": [{"class": label}]
        },
        "metadata": metadata
    })

In [7]:
records = []
for record in dataset['validation']:
    records.append(create_record(
        idx=record["product_id"],
        inputs={
            "review_body": record['review_body'],
            "review_title": record['review_title']
        },
        metadata={
            "product_category": record["product_category"],
            "reviewer_id": record["reviewer_id"]
        },
        label=record["stars"]
    ))

In [8]:
records[0:5]

[TextClassificationRecord(inputs=TextClassificationRecordInputs(additional_properties={'review_body': 'Malisimo, muy grande demasiado aparatoso y mal protector de pantalla', 'review_title': 'Mala compra'}), id=<rubrix.sdk.types.Unset object at 0x7f990e83b290>, metadata=TextClassificationRecordMetadata(additional_properties={'product_category': 'wireless', 'reviewer_id': 'reviewer_es_0672978'}), status=None, prediction=<rubrix.sdk.types.Unset object at 0x7f990e83b290>, annotation=TextClassificationAnnotation(agent='test', labels=[ClassPrediction(class_=1, confidence=<rubrix.sdk.types.Unset object at 0x7f990e83b290>, additional_properties={})], additional_properties={}), event_timestamp=None, multi_label=<rubrix.sdk.types.Unset object at 0x7f990e83b290>, explanation=<rubrix.sdk.types.Unset object at 0x7f990e83b290>, additional_properties={'idx': 'product_es_0873923'}),
 TextClassificationRecord(inputs=TextClassificationRecordInputs(additional_properties={'review_body': 'No he recibido el

In [9]:
rubrix.log(
    name="amazon_sentiment_es_validation_ds_multifield", 
    records=records, 
    tags={ 
        "type":"sentiment classifier",
        "lang": "spanish",
        "description": "Spanish sentiment classifier with `multifield inputs` (title and body)"
    }, 
    chunk_size=100
)

BulkResponse(dataset='amazon_sentiment_es_validation_ds_multifield', processed=5000, failed=0, additional_properties={})

# Store predictions and labels with a pretrained model for error analysis

Here we will use a pretrained transformer from huggingface Hub for analysing its quality with an unseen dataset

In [10]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer

CHKPT = 'mrm8488/electricidad-small-finetuned-muchocine'
model = AutoModelForSequenceClassification.from_pretrained(CHKPT)
tokenizer = AutoTokenizer.from_pretrained(CHKPT)

from transformers import pipeline
classifier = pipeline('sentiment-analysis', model=model, tokenizer=tokenizer, return_all_scores=True)

In [11]:
def get_stars(label):
    return len([e for e in label if e != ' '])

In [12]:
[{"class":get_stars(e['label']), "confidence": e['score']} for e in classifier("Una buena película, sin más.")[0]]

[{'class': 1, 'confidence': 0.0005326213431544602},
 {'class': 2, 'confidence': 0.0003107075172010809},
 {'class': 3, 'confidence': 0.006889391224831343},
 {'class': 4, 'confidence': 0.9659245014190674},
 {'class': 5, 'confidence': 0.02634277381002903}]

In [13]:
def create_record_with_preds(idx, inputs, label, metadata, prediction):
    return TextClassificationRecord.from_dict({
         "idx": idx,
         "inputs": inputs,
         "annotation": {
             "agent": "test",
             "labels": [{"class": label}]
         },
         "prediction": prediction,
         "metadata": metadata
    })

In [14]:
records = []

for record in dataset['validation']:
    records.append(create_record_with_preds(
        idx=record["product_id"],
        inputs={
            "review_body": record['review_body'],
            "review_title": record['review_title']
        },
        metadata={
            "product_category": record["product_category"],
            "reviewer_id": record["reviewer_id"]
        },
        label=record["stars"],
        prediction={
            "agent": CHKPT, 
            "labels": [{"class":get_stars(e['label']), "confidence": e['score']} for e in classifier(record['review_body'])[0]]
        }
       
    ))

In [15]:
len(records)

5000

In [16]:
rubrix.log(
    name="amazon_sentiment_es_validation_ds_multifield", 
    records=records, 
    tags={
        "env": "test", 
        "model": CHKPT, 
        "type": "evaluation dataset"
    }, 
    chunk_size=200
)

BulkResponse(dataset='amazon_sentiment_es_validation_ds_multifield', processed=5000, failed=0, additional_properties={})

# Store only predictions for model monitoring

Here we will use a pretrained transformer from huggingface Hub for monitoring the model predictions over time.

In [17]:
def create_record_only_preds(idx, inputs, metadata, prediction):
    return TextClassificationRecord.from_dict({
         "idx": idx,
         "inputs": inputs,

         "prediction": prediction,
         "metadata": metadata
    })

In [18]:
records = []

for record in dataset['validation']:
    records.append(create_record_only_preds(
        idx=record["product_id"],
        inputs={
            "review_body": record['review_body'],
            "review_title": record['review_title']
        },
        metadata={
            "product_category": record["product_category"],
            "reviewer_id": record["reviewer_id"]
        },
        
        prediction={
            "agent": CHKPT, 
            "labels": [{"class":get_stars(e['label']), "confidence": e['score']} for e in classifier(record['review_body'])[0]]
        }
       
    ))

In [19]:
rubrix.log(
    name="amazon_sentiment_es_validation_ds_multifield", 
    records=records, 
    tags={
        "env": "test", 
        "model": CHKPT, 
        "type": "evaluation dataset"
    }, 
    chunk_size=200
)

BulkResponse(dataset='amazon_sentiment_es_validation_ds_multifield', processed=5000, failed=0, additional_properties={})