In [1]:
import fiftyone as fo
import pandas as pd
import numpy as np
import os
import json
from ast import literal_eval

In [2]:
# defining relative paths
parent_dir = os.path.dirname(os.path.abspath(os.getcwd()))
dataset_dir = os.path.join(parent_dir, "data")
imgdata_dir = os.path.join(dataset_dir,"img")

In [3]:
classes = ["Not_Checkworthy", "Checkworthy"]

### Functions to Load Data

In [20]:
def create_classification_label(binary_label, confidence=None):
    verbose_label = "Checkworthy" if binary_label else "Not_Checkworthy"
    label = fo.Classification(label=verbose_label, binary_label=binary_label)
    if confidence!= None:
        label.confidence = confidence
        label_prob = confidence
        other_prob = 1-confidence
        label.logits = [other_prob, label_prob] if binary_label else [label_prob, other_prob]
    return label
    

In [22]:
def load_from_df(df, dataset):
    for index, row in df.iterrows():

        file_name = str(row.tweet_id) + ".png"
        file_path = os.path.join(imgdata_dir,file_name)

        metadata = fo.ImageMetadata.build_for(file_path)

        ground_truth = create_classification_label(row.check_worthiness)
        bert_prediction = create_classification_label(row.bert_predictions, row.bert_probability)
        roberta_prediction = create_classification_label(row.roberta_predictions, row.roberta_probability)
        bertweet_prediction = create_classification_label(row.bertweet_predictions, row.bertweet_probability)
        gpt3_prediction = create_classification_label(row.gpt3_predictions, row.gpt3_probability)

        sample = fo.Sample(
            filepath=file_path, 
            metadata=metadata, 
            ground_truth = ground_truth, 
            bert_prediction = bert_prediction,
            roberta_prediction = roberta_prediction,
            bertweet_prediction = bertweet_prediction,
            gpt3_prediction = gpt3_prediction
            )

        sample.tags.append("Validation")
        dataset.add_sample(sample)

### Creating Dataset and Loading Data

In [6]:
dataset_name = "tweet_dataset"

In [34]:
# If you created a dataset with the same name before and create the dataset from scratch, just delete the previous dataset
fo.delete_dataset(dataset_name)

In [35]:
try:
    dataset = fo.Dataset(dataset_name)
except:
    dataset = fo.load_dataset(dataset_name)

In [10]:
eval_df = pd.read_csv(os.path.join(dataset_dir,"eval_df.csv"))
eval_df["embeddings"] = eval_df.embeddings.apply(json.loads)
eval_df["embeddings"] = eval_df["embeddings"].apply(lambda x: np.asanyarray(x))


In [12]:
eval_df.head(3)

Unnamed: 0,tweet_id,tweet_url,tweet_text,check_worthiness,bert_predictions,bert_probability,roberta_predictions,roberta_probability,bertweet_predictions,bertweet_probability,gpt3_predictions,gpt3_probability,embeddings
0,1237160250513522688,https://twitter.com/user/status/12371602505135...,POTUS wanted everyone to know he was in close ...,1,1,0.993853,1,0.983054,1,0.988866,1,0.890893,"[0.16674243, 0.3065092, -0.112421855, 0.048177..."
1,1237125962871037953,https://twitter.com/user/status/12371259628710...,Who would you prefer to lead our nation’s resp...,0,0,0.000108,0,0.006362,0,0.006661,0,0.999922,"[0.22938012, 0.054673575, -0.0858, -0.07526214..."
2,1237207721604235264,https://twitter.com/user/status/12372077216042...,It was a really really really really really re...,0,0,0.000569,0,0.004905,0,0.007607,0,0.959031,"[0.03409792, 0.45846257, -0.015111784, 0.25196..."


In [37]:
load_from_df(eval_df, dataset)

In [38]:
print(dataset)

Name:        tweet_dataset
Media type:  image
Num samples: 140
Persistent:  False
Tags:        []
Sample fields:
    id:                  fiftyone.core.fields.ObjectIdField
    filepath:            fiftyone.core.fields.StringField
    tags:                fiftyone.core.fields.ListField(fiftyone.core.fields.StringField)
    metadata:            fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.metadata.ImageMetadata)
    ground_truth:        fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Classification)
    bert_prediction:     fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Classification)
    roberta_prediction:  fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Classification)
    bertweet_prediction: fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Classification)
    gpt3_prediction:     fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Classification)


### GUI of FiftyOne

In [41]:
session = fo.launch_app(dataset, desktop=False)

### Evaluating Predictions of Models

In [21]:
def evaluate_model(model_prefix: str):
    print(f"***\nEvaluation Results For {model_prefix.upper()} model:\n***")
    prediction_label = model_prefix + "_prediction"
    eval_key=f"eval_{model_prefix}_binary"

    results = dataset.evaluate_classifications(
        prediction_label,
        gt_field="ground_truth",
        eval_key=eval_key,
        method="binary",
        classes=classes,
    )

    results.print_report()

    plot = results.plot_pr_curve()
    plot.show()
    
    conf_plot = results.plot_confusion_matrix()
    conf_plot.show()

In [22]:
evaluate_model("bert")

***
Evaluation Results For BERT model:
***
                 precision    recall  f1-score   support

Not_Checkworthy       0.75      0.70      0.72        80
    Checkworthy       0.63      0.68      0.66        60

       accuracy                           0.69       140
      macro avg       0.69      0.69      0.69       140
   weighted avg       0.70      0.69      0.69       140





FigureWidget({
    'data': [{'customdata': array([0.572932  , 0.6093414 , 0.6666847 , 0.69903913, 0.7764561 , …



FigureWidget({
    'data': [{'mode': 'markers',
              'opacity': 0.1,
              'type': 'scatter',…

In [23]:
evaluate_model("roberta")

***
Evaluation Results For ROBERTA model:
***
                 precision    recall  f1-score   support

Not_Checkworthy       0.84      0.78      0.81        80
    Checkworthy       0.73      0.80      0.76        60

       accuracy                           0.79       140
      macro avg       0.78      0.79      0.78       140
   weighted avg       0.79      0.79      0.79       140





FigureWidget({
    'data': [{'customdata': array([0.56247944, 0.5634323 , 0.58569556, 0.59688774, 0.62889504, …



FigureWidget({
    'data': [{'mode': 'markers',
              'opacity': 0.1,
              'type': 'scatter',…

In [25]:
evaluate_model("bertweet")

***
Evaluation Results For BERTWEET model:
***
                 precision    recall  f1-score   support

Not_Checkworthy       0.78      0.80      0.79        80
    Checkworthy       0.72      0.70      0.71        60

       accuracy                           0.76       140
      macro avg       0.75      0.75      0.75       140
   weighted avg       0.76      0.76      0.76       140





FigureWidget({
    'data': [{'customdata': array([0.55340412, 0.60111934, 0.60657996, 0.63541842, 0.6430512 , …



FigureWidget({
    'data': [{'mode': 'markers',
              'opacity': 0.1,
              'type': 'scatter',…

## FIFTYONE BRAIN

In [16]:
import fiftyone.brain as fob

### Computing Sample hardness

In [39]:
fob.compute_hardness(dataset, "gpt3_prediction")

Computing hardness...
 100% |█████████████████| 140/140 [203.3ms elapsed, 0s remaining, 688.7 samples/s]      
Hardness computation complete


In [40]:
model_prefix_list = ["bert", "roberta", "bertweet", "gpt3"]

for prefix in model_prefix_list:
    fob.compute_mistakenness(dataset, prefix+"_prediction", label_field="ground_truth", mistakenness_field=prefix+"_mistakeness", use_logits=True)

Computing mistakenness...
 100% |█████████████████| 140/140 [220.0ms elapsed, 0s remaining, 636.5 samples/s]      
Mistakenness computation complete
Computing mistakenness...
 100% |█████████████████| 140/140 [223.6ms elapsed, 0s remaining, 626.1 samples/s]      
Mistakenness computation complete
Computing mistakenness...
 100% |█████████████████| 140/140 [241.1ms elapsed, 0s remaining, 580.6 samples/s]      
Mistakenness computation complete
Computing mistakenness...
 100% |█████████████████| 140/140 [245.5ms elapsed, 0s remaining, 570.3 samples/s]      
Mistakenness computation complete


In [16]:
results = fob.compute_visualization(
    dataset,
    embeddings=eval_df.embeddings,
    num_dims=2,
    brain_key="image_embeddings",
    verbose=True,
    seed=51,
)

Generating visualization...
UMAP(random_state=51, verbose=True)
Sun Sep 18 10:26:20 2022 Construct fuzzy simplicial set
Sun Sep 18 10:26:21 2022 Finding Nearest Neighbors
Sun Sep 18 10:26:23 2022 Finished Nearest Neighbor Search
Sun Sep 18 10:26:25 2022 Construct embedding


Epochs completed:   0%|            0/500 [00:00]

Sun Sep 18 10:26:26 2022 Finished embedding


In [18]:
print(type(results))
print(results.points.shape)

<class 'fiftyone.brain.visualization.VisualizationResults'>
(140, 2)


In [21]:
session = fo.launch_app(dataset)

In [25]:
# Plot embeddings colored by ground truth label
plot = results.visualize(labels="ground_truth.label")
plot.show(height=720)

# Attach plot to session
session.plots.attach(plot)



FigureWidget({
    'data': [{'customdata': array(['6326c696494f62ffc8e1bb11', '6326c697494f62ffc8e1bb2c',
    …

In [19]:
print(dataset.list_brain_runs())

['image_embeddings']


In [63]:
# Sort by likelihood of mistake (most likely first)
mistake_view = (dataset
    .sort_by("mistakenness", reverse=True)
)

# Print some information about the view
print(mistake_view)

Dataset:     tweet_dataset
Media type:  image
Num samples: 140
Sample fields:
    id:                  fiftyone.core.fields.ObjectIdField
    filepath:            fiftyone.core.fields.StringField
    tags:                fiftyone.core.fields.ListField(fiftyone.core.fields.StringField)
    metadata:            fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.metadata.ImageMetadata)
    ground_truth:        fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Classification)
    bert_prediction:     fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Classification)
    roberta_prediction:  fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Classification)
    bertweet_prediction: fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Classification)
    hardness:            fiftyone.core.fields.FloatField
    mistakenness:        fiftyone.core.fields.FloatField
View stages:
    1. SortBy(field_or_expr='mistakenness', reverse=True)


In [64]:
print(mistake_view.head())

[<SampleView: {
    'id': '63248dcc0fb9082b16cd666a',
    'media_type': 'image',
    'filepath': 'c:\\repos\\Transformers_For_Claim_Worthiness\\data\\img\\1237234567423107077.png',
    'tags': BaseList(['Validation']),
    'metadata': <ImageMetadata: {
        'size_bytes': 11520,
        'mime_type': 'image/png',
        'width': 256,
        'height': 256,
        'num_channels': 3,
    }>,
    'ground_truth': <Classification: {
        'id': '63248dcc0fb9082b16cd6666',
        'tags': BaseList([]),
        'label': 'Checkworthy',
        'confidence': None,
        'logits': None,
        'binary_label': 1,
    }>,
    'bert_prediction': <Classification: {
        'id': '63248dcc0fb9082b16cd6667',
        'tags': BaseList([]),
        'label': 'Not_Checkworthy',
        'confidence': 0.000120559474,
        'logits': array([1.20559474e-04, 9.99879441e-01]),
        'binary_label': 0,
    }>,
    'roberta_prediction': <Classification: {
        'id': '63248dcc0fb9082b16cd6668',
     

In [None]:
session.view = dataset.sort_by("mistakenness")

In [67]:
session = fo.launch_app(dataset, desktop=True)

ValueError: Cannot open a Desktop App instance from a IPYTHON notebook

### FINDING CLASSIFICATION MISTAKES WITH FIFTYONE