In [1]:
import fiftyone as fo
import pandas as pd
import os

In [2]:
# defining relative paths
parent_dir = os.path.dirname(os.path.abspath(os.getcwd()))
dataset_dir = os.path.join(parent_dir, "data")
imgdata_dir = os.path.join(dataset_dir,"img")

In [3]:
classes = ["Not_Checkworthy", "Checkworthy"]

### Functions to Load Data

In [4]:
def create_classification_label(binary_label, confidence=None):
    verbose_label = "Checkworthy" if binary_label else "Not_Checkworthy"
    label = fo.Classification(label=verbose_label, binary_label=binary_label)
    if confidence:
        label.confidence = confidence
        label_prob = confidence
        other_prob = 1-confidence
        label.logits = [other_prob, label_prob] if binary_label else [label_prob, other_prob]
    return label
    

In [5]:
def load_from_df(df, dataset):
    for index, row in df.iterrows():

        file_name = str(row.tweet_id) + ".png"
        file_path = os.path.join(imgdata_dir,file_name)

        metadata = fo.ImageMetadata.build_for(file_path)

        ground_truth = create_classification_label(row.check_worthiness)
        bert_prediction = create_classification_label(row.bert_predictions, row.bert_probability)
        roberta_prediction = create_classification_label(row.roberta_predictions, row.roberta_probability)
        bertweet_prediction = create_classification_label(row.bertweet_predictions, row.bertweet_probability)

        sample = fo.Sample(
            filepath=file_path, 
            metadata=metadata, 
            ground_truth = ground_truth, 
            bert_prediction = bert_prediction,
            roberta_prediction = roberta_prediction,
            bertweet_prediction = bertweet_prediction
            )

        sample.tags.append("Validation")
        dataset.add_sample(sample)

### Creating Dataset and Loading Data

In [6]:
dataset_name = "tweet_dataset"

In [7]:
# If you created a dataset with the same name before and create the dataset from scratch, just delete the previous dataset
# fo.delete_dataset(dataset_name)

In [8]:
try:
    dataset = fo.Dataset(dataset_name)
except:
    dataset = fo.load_dataset(dataset_name)

In [9]:
eval_df = pd.read_csv(os.path.join(dataset_dir,"eval_df.csv"))

In [52]:
load_from_df(eval_df, dataset)

In [53]:
print(dataset)

Name:        tweet_dataset
Media type:  image
Num samples: 140
Persistent:  False
Tags:        []
Sample fields:
    id:                  fiftyone.core.fields.ObjectIdField
    filepath:            fiftyone.core.fields.StringField
    tags:                fiftyone.core.fields.ListField(fiftyone.core.fields.StringField)
    metadata:            fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.metadata.ImageMetadata)
    ground_truth:        fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Classification)
    bert_prediction:     fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Classification)
    roberta_prediction:  fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Classification)
    bertweet_prediction: fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Classification)


### GUI of FiftyOne

In [9]:
session = fo.launch_app(dataset)

### Evaluating Predictions of Models

In [21]:
def evaluate_model(model_prefix: str):
    print(f"***\nEvaluation Results For {model_prefix.upper()} model:\n***")
    prediction_label = model_prefix + "_prediction"
    eval_key=f"eval_{model_prefix}_binary"

    results = dataset.evaluate_classifications(
        prediction_label,
        gt_field="ground_truth",
        eval_key=eval_key,
        method="binary",
        classes=classes,
    )

    results.print_report()

    plot = results.plot_pr_curve()
    plot.show()
    
    conf_plot = results.plot_confusion_matrix()
    conf_plot.show()

In [22]:
evaluate_model("bert")

***
Evaluation Results For BERT model:
***
                 precision    recall  f1-score   support

Not_Checkworthy       0.75      0.70      0.72        80
    Checkworthy       0.63      0.68      0.66        60

       accuracy                           0.69       140
      macro avg       0.69      0.69      0.69       140
   weighted avg       0.70      0.69      0.69       140





FigureWidget({
    'data': [{'customdata': array([0.572932  , 0.6093414 , 0.6666847 , 0.69903913, 0.7764561 , …



FigureWidget({
    'data': [{'mode': 'markers',
              'opacity': 0.1,
              'type': 'scatter',…

In [23]:
evaluate_model("roberta")

***
Evaluation Results For ROBERTA model:
***
                 precision    recall  f1-score   support

Not_Checkworthy       0.84      0.78      0.81        80
    Checkworthy       0.73      0.80      0.76        60

       accuracy                           0.79       140
      macro avg       0.78      0.79      0.78       140
   weighted avg       0.79      0.79      0.79       140





FigureWidget({
    'data': [{'customdata': array([0.56247944, 0.5634323 , 0.58569556, 0.59688774, 0.62889504, …



FigureWidget({
    'data': [{'mode': 'markers',
              'opacity': 0.1,
              'type': 'scatter',…

In [25]:
evaluate_model("bertweet")

***
Evaluation Results For BERTWEET model:
***
                 precision    recall  f1-score   support

Not_Checkworthy       0.78      0.80      0.79        80
    Checkworthy       0.72      0.70      0.71        60

       accuracy                           0.76       140
      macro avg       0.75      0.75      0.75       140
   weighted avg       0.76      0.76      0.76       140





FigureWidget({
    'data': [{'customdata': array([0.55340412, 0.60111934, 0.60657996, 0.63541842, 0.6430512 , …



FigureWidget({
    'data': [{'mode': 'markers',
              'opacity': 0.1,
              'type': 'scatter',…

In [37]:
results = dataset.evaluate_classifications(
    "bertweet_prediction",
    gt_field="ground_truth",
    method = "binary",
    eval_key="evalu",
    classes=classes
)

In [38]:
results

<fiftyone.utils.eval.classification.BinaryClassificationResults at 0x2b0b14eb910>

In [42]:
plot = results.plot_confusion_matrix()
plot.show()



FigureWidget({
    'data': [{'mode': 'markers',
              'opacity': 0.1,
              'type': 'scatter',…

## FIFTYONE BRAIN

In [54]:
import fiftyone.brain as fob

### Computing Sample hardness

In [55]:
fob.compute_hardness(dataset, "bertweet_prediction")

Computing hardness...
 100% |█████████████████| 140/140 [323.5ms elapsed, 0s remaining, 432.8 samples/s]      
Hardness computation complete


In [57]:
fob.compute_mistakenness(dataset, "bertweet_prediction", label_field="ground_truth", use_logits=True)

Computing mistakenness...
 100% |█████████████████| 140/140 [301.3ms elapsed, 0s remaining, 464.7 samples/s]      
Mistakenness computation complete


In [10]:
print(dataset.list_brain_runs())

['hardness', 'mistakenness']


In [63]:
# Sort by likelihood of mistake (most likely first)
mistake_view = (dataset
    .sort_by("mistakenness", reverse=True)
)

# Print some information about the view
print(mistake_view)

Dataset:     tweet_dataset
Media type:  image
Num samples: 140
Sample fields:
    id:                  fiftyone.core.fields.ObjectIdField
    filepath:            fiftyone.core.fields.StringField
    tags:                fiftyone.core.fields.ListField(fiftyone.core.fields.StringField)
    metadata:            fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.metadata.ImageMetadata)
    ground_truth:        fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Classification)
    bert_prediction:     fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Classification)
    roberta_prediction:  fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Classification)
    bertweet_prediction: fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Classification)
    hardness:            fiftyone.core.fields.FloatField
    mistakenness:        fiftyone.core.fields.FloatField
View stages:
    1. SortBy(field_or_expr='mistakenness', reverse=True)


In [64]:
print(mistake_view.head())

[<SampleView: {
    'id': '63248dcc0fb9082b16cd666a',
    'media_type': 'image',
    'filepath': 'c:\\repos\\Transformers_For_Claim_Worthiness\\data\\img\\1237234567423107077.png',
    'tags': BaseList(['Validation']),
    'metadata': <ImageMetadata: {
        'size_bytes': 11520,
        'mime_type': 'image/png',
        'width': 256,
        'height': 256,
        'num_channels': 3,
    }>,
    'ground_truth': <Classification: {
        'id': '63248dcc0fb9082b16cd6666',
        'tags': BaseList([]),
        'label': 'Checkworthy',
        'confidence': None,
        'logits': None,
        'binary_label': 1,
    }>,
    'bert_prediction': <Classification: {
        'id': '63248dcc0fb9082b16cd6667',
        'tags': BaseList([]),
        'label': 'Not_Checkworthy',
        'confidence': 0.000120559474,
        'logits': array([1.20559474e-04, 9.99879441e-01]),
        'binary_label': 0,
    }>,
    'roberta_prediction': <Classification: {
        'id': '63248dcc0fb9082b16cd6668',
     

In [None]:
session.view = dataset.sort_by("mistakenness")

In [67]:
session = fo.launch_app(dataset, desktop=True)

ValueError: Cannot open a Desktop App instance from a IPYTHON notebook

### FINDING CLASSIFICATION MISTAKES WITH FIFTYONE