In [1]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
from transformers import pipeline

In [2]:
import os

# Ensure that PyTorch can use all CPU cores
os.environ["OMP_NUM_THREADS"] = str(os.cpu_count())  # Set the number of threads to the number of available CPU cores
os.environ["MKL_NUM_THREADS"] = str(os.cpu_count())  # For NumPy-related operations

In [3]:
#E.g. using the zero-shot-classification pipeline
classifier=pipeline("zero-shot-classification", model="facebook/bart-large-mnli", device=-1)

In [4]:
#Create an example dataframe. Here, 1 corresponds to disaster, 0 to no disaster.
df = pd.DataFrame(["A tornado just hit New York.", "Donald Trump is running for president."], columns=["Text"])
df['Hand_Labels'] = [1, 0]

df

Unnamed: 0,Text,Hand_Labels
0,A tornado just hit New York.,1
1,Donald Trump is running for president.,0


In [12]:
# Predicted labels using our zero-zhot classification model
#%%time
output = classifier(
    df.Text.to_list(),
    candidate_labels = ["disaster","not disaster"],
)

#Extracting most probable labels from the output
labels = [i['labels'][0] for i in output]

#Mapping text labels to 0 or 1 
labels = pd.Series(labels).map({'disaster':1,'not disaster': 0}).to_list()

#Adding predicted labels to the example dataframe
df['Predicted_Labels'] = labels
df

Unnamed: 0,Text,Hand_Labels,Predicted_Labels
0,A tornado just hit New York.,1,1
1,Donald Trump is running for president.,0,0
