In [1]:
import pandas as pd
from transformers import pipeline
from tqdm import tqdm
import csv

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
file_path = "interpreted_applications.csv"
df = pd.read_csv(file_path)

In [3]:
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


In [4]:
labels = [
    "Personal and Family names",
    "Business and University Identities",
    "Vehicle and Car Descriptions and Attributes",
    "Diverse Personal Expressions",
    "Area code and Numbers",
    "Gang Color and Controversial Affiliations",
    "Pet and Animal Themes",
    "Colors and Descriptive Traits"
]

In [10]:
output_file_path = "interpreted_applications_with_predictions.csv"
with open(output_file_path, mode='w', newline='') as file:
    writer = csv.writer(file)
    # Write the header row
    writer.writerow(["plate", "majority_vote", "gpt_meaning", "predicted_class"])
    for index, row in tqdm(df.iterrows(), total=len(df), desc="Classifying and writing rows"):
        if pd.notna(row['gpt_meaning']):
            result = classifier(row['gpt_meaning'], labels)
            predicted_class = result['labels'][0]
        else:
            predicted_class = None
        writer.writerow([row['plate'], row['majority_vote'], row['gpt_meaning'], predicted_class])
        file.flush()  # Ensure data is written to disk

Classifying and writing rows: 100%|██████████| 2088/2088 [42:17<00:00,  1.22s/it]


In [None]:
if (index + 1) % 500 == 0:
    print(f"Auto-saving and downloading after {index + 1} rows completed.")
    # Provide a download link or trigger download logic as per the environment

print("Predicted classes have been saved to interpreted_applications_with_predictions.csv")