In [1]:
import pandas as pd
from tqdm import tqdm
import logging
from transformers import pipeline


# 1 Subclassifier

In [17]:

logging.basicConfig(level=logging.INFO)
classifier = pipeline("zero-shot-classification",
                      model="facebook/bart-large-mnli")


# Subcategories for Road Safety Workshops
road_safety_subcategory_dict = {
    "This road safety workshop is targeted at children or schools, aiming to teach traffic safety and traffic awareness from an early age.": 'Schools',
    "This road safety workshop is designed for cyclists or alternative mobility users to improve their safety awareness and skills on the road.": 'Cyclists',
    "This road safety workshop is directed at automobile drivers to enhance their knowledge of traffic laws and safe driving techniques.": 'Drivers'
}

conference_subcategory_dict = {
    "This conference, workshop, or talk is designed for the general public to discuss mobility issues, raise awareness, and encourage sustainable practices.": 'General Audience',
    "This conference, workshop, or talk is specifically targeted at children or schools, focusing on education and promoting safe and sustainable mobility among younger audiences.": 'Children/Schools',
    "This conference, workshop, or talk is aimed at professionals or policymakers in the mobility sector, discussing regulations, innovations, or infrastructure planning for sustainable mobility.": 'Professionals/Policymakers'
}

# Step 1: Read the activities.csv file (which contains the translated descriptions and prompts)
prompt_df = pd.read_csv('../data/activities_t.csv')
class_df = pd.read_csv('../data/classification_results.csv')

class_df['ID'] = pd.to_numeric(class_df['ID'], errors='coerce')
# Step 2: Add the 'prompt' column to class_df
class_df['prompt'] = class_df['ID'].map(prompt_df.set_index('ID')['prompt'])

road_safety_res = []
conference_res = []
# Step 2: Primary classification
for i, activity in tqdm(class_df.iterrows(), total=len(class_df), desc="Classifying activities"):
    
        
        if activity['Road Safety Workshop'] >=0.15 :
            # Perform subclassification for Road Safety Workshops
            subclass_result = classifier(activity['prompt'], candidate_labels=list(road_safety_subcategory_dict.keys()))
            top_subcategory = road_safety_subcategory_dict[subclass_result['labels'][0]]
            top_subcategory_score = subclass_result['scores'][0]  # Get the score of the top subclass category
            road_safety_res.append({
            "ID": activity['ID'],
            "secondary_result": subclass_result,
            "secondary_classification": top_subcategory,
            "secondary_score": top_subcategory_score  # Store the score for subclassification
        })
        if activity['Conference']  >= 0.35:
            # Perform subclassification for Road Safety Workshops
            subclass_result = classifier(activity['prompt'], candidate_labels=list(conference_subcategory_dict.keys()))
            top_subcategory = conference_subcategory_dict[subclass_result['labels'][0]]
            top_subcategory_score = subclass_result['scores'][0]  # Get the score of the top subclass category
            conference_res.append({
            "ID": activity['ID'],
            "secondary_result": subclass_result,
            "secondary_classification": top_subcategory,
            "secondary_score": top_subcategory_score  # Store the score for subclassification
        })
        

# Step 3: Save the classification results to a new CSV file
road_safety_df = pd.DataFrame(road_safety_res)
conference_df = pd.DataFrame(conference_res)
road_safety_df.to_csv('../data/road_safety_subclassification.csv', index=False)
conference_df.to_csv('../data/conference_subclassification.csv', index=False)


Classifying activities: 100%|██████████| 1039/1039 [14:50<00:00,  1.17it/s]
