In [1]:
pip install gradio

Collecting gradio
  Downloading gradio-5.13.2-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.8-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.6.0 (from gradio)
  Downloading gradio_client-1.6.0-py3-none-any.whl.metadata (7.1 kB)
Collecting markupsafe~=2.0 (from gradio)
  Downloading MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.2.2 (from gradio)
  Downloading ruff-0.9.3-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.meta

In [2]:
import gradio as gr
import pandas as pd
import uuid

# Path to the CSV file
csv_file = "annotations.csv"

# Predefined languages and regions
languages = {
    "English": "en",
    "French": "fr",
    "Italian": "it",
    "Marathi": "mr",
    "Portuguese (Brazil)": "pt-BR",
    "German": "de",
    "Romanian": "ro",
    "Russian": "ru",
    "Dutch": "nl",
    "Chinese": "zh",
    "Arabic": "ar",
}

regions = {
    "United States": "USA",
    "France": "FRA",
    "Netherlands": "NLD",
    "India": "IND",
    "China": "CHN",
    "Hong Kong": "HKG",
    "Brazil": "BRA",
    "Italy": "ITA",
    "Germany": "DEU",
    "Romania": "ROU",
    "Algeria": "DZA",
    "Egypt": "EGY",
    "Libya": "LBY",
    "Morocco": "MAR",
    "Sudan": "SDN",
    "Tunisia": "TUN",
}

# Function to save annotation
def save_annotation(
    bias_type, stereotype_origin_langs, additional_origin_langs,
    stereotype_valid_langs, additional_valid_langs,
    stereotype_valid_regions, additional_regions, stereotyped_entity, type_,
    en_biased_sentences, en_expression, en_comments, created_by, creator_attributes_optional
):
    # Combine checkbox and textbox inputs
    combined_origin_langs = stereotype_origin_langs + additional_origin_langs.split(",") if additional_origin_langs else stereotype_origin_langs
    combined_valid_langs = stereotype_valid_langs + additional_valid_langs.split(",") if additional_valid_langs else stereotype_valid_langs
    combined_valid_regions = stereotype_valid_regions + additional_regions.split(",") if additional_regions else stereotype_valid_regions

    # Prepare the data row
    data = {
        "index": str(uuid.uuid4()),
        "subset": "_gradio",
        "bias_type": bias_type,
        "stereotype_origin_langs": combined_origin_langs,
        "stereotype_valid_langs": combined_valid_langs,
        "stereotype_valid_regions": combined_valid_regions,
        "stereotyped_entity": stereotyped_entity,
        "type": type_,
        "en_biased_sentences": en_biased_sentences,
        "en_expression": en_expression,
        "en_comments": en_comments,
        "created_by": created_by,
        "creator_attributes_optional": creator_attributes_optional,
    }

    # Save to CSV
    try:
        df = pd.read_csv(csv_file)
    except FileNotFoundError:
        df = pd.DataFrame(columns=data.keys())

    df = df.append(data, ignore_index=True)
    df.to_csv(csv_file, index=False)

    return "Annotation saved successfully!"

# Create Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("### Annotation Form")

    bias_type = gr.CheckboxGroup(label="Bias Type", choices=["gender", "race", "age", "other"])

    with gr.Row():
        stereotype_origin_langs = gr.CheckboxGroup(
            label="Stereotype Origin Languages",
            choices=[f"{name} ({code})" for name, code in languages.items()]
        )
        additional_origin_langs = gr.Textbox(
            label="Additional Origin Languages (CSV format, e.g., Spanish:es, Japanese:ja)"
        )

    with gr.Row():
        stereotype_valid_langs = gr.CheckboxGroup(
            label="Stereotype Valid Languages",
            choices=[f"{name} ({code})" for name, code in languages.items()]
        )
        additional_valid_langs = gr.Textbox(
            label="Additional Valid Languages (CSV format, e.g., Spanish:es, Japanese:ja)"
        )

    with gr.Row():
        stereotype_valid_regions = gr.CheckboxGroup(
            label="Stereotype Valid Regions",
            choices=[f"{name} ({code})" for name, code in regions.items()]
        )
        additional_regions = gr.Textbox(
            label="Additional Regions (CSV format, e.g., Mexico:MEX, Canada:CAN)"
        )

    stereotyped_entity = gr.Textbox(label="Stereotyped Entity")
    type_ = gr.Dropdown(label="Type", choices=["declaration", "question", "statement"])
    en_biased_sentences = gr.Textbox(label="English Biased Sentences", lines=3)
    en_expression = gr.Checkbox(label="Expression Validity")
    en_comments = gr.Textbox(label="English Comments (optional)", lines=2)

    with gr.Row():
        created_by = gr.Textbox(label="Created By")
        creator_attributes_optional = gr.Textbox(label="Creator Attributes (JSON format)")

    submit = gr.Button("Submit Annotation")
    submit.click(
        save_annotation,
        inputs=[
            bias_type, stereotype_origin_langs, additional_origin_langs,
            stereotype_valid_langs, additional_valid_langs,
            stereotype_valid_regions, additional_regions, stereotyped_entity,
            type_, en_biased_sentences, en_expression, en_comments,
            created_by, creator_attributes_optional
        ],
        outputs=[gr.Textbox(label="Status")]
    )

demo.launch()

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://d7f825d58ece6ec1c5.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [None]:
!pip install datasets

Collecting datasets
  Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.1.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m8.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.9.0-py3-none-any.whl (1