# Golden Dataset Label Expansion and Cleaning

This notebook expands human annotations into analysis-ready labels and prepares
the Golden Dataset for governance-aligned evaluation.

The focus is on explicitly encoding ambiguity rather than collapsing disagreement
into a single ground-truth label.


In [None]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!pip install --quiet gspread gspread-dataframe oauth2client

import pandas as pd
import numpy as np

import gspread
from google.auth.transport.requests import Request
from google.auth import default

from gspread_dataframe import set_with_dataframe

print("Libraries imported successfully.")


Libraries imported successfully.


## Annotation Inputs

This notebook assumes access to a manually annotated Golden subset derived from
the Civil Comments dataset.

Annotation files are not included in this repository and must be provided locally
by the user.


In [None]:
df = pd.read_excel(
    '/content/drive/MyDrive/Dat490/Dataset/golden_dataset_preclean.xlsx',
    sheet_name='golden_dataset'
)

# Show first rows
df.head()


Unnamed: 0,article_id,created_date,publication_id,original_dominant_label,original_max_label_score,risk_tier,stratum_key,parent_id,id,parent_text,...,first_label,ambiguity,comments,first_rater_name,second_rater_agrees,second_rater_name,second_rater_notes,final_notes,final_label,final_ambiguity
0,381463,2017-09-24 23:13:39.809705+00:00,102,toxicity,0.787879,High-Risk,High-Risk_toxicity,6013735,6015432,Glad to see the Broncos and players all over t...,...,"insult, toxicity",clear_violation,,mamoun,Yes,Destiny,,done,"insult, toxicity",clear_violation
1,358797,2017-07-24 21:42:10.511219+00:00,102,toxicity,0.5,High-Risk,High-Risk_toxicity,5650746,5650987,You can say what you want about Obama but at l...,...,"insult, toxicity",clear_violation,,mamoun,Yes,Destiny,,done,"insult, toxicity",clear_violation
2,148987,2016-10-20 00:14:30.122014+00:00,21,toxicity,1.0,High-Risk,High-Risk_toxicity,0,7080512,,...,"insult, toxicity",clear_violation,,mamoun,Yes,Destiny,,done,"insult, toxicity",clear_violation
3,387828,2017-10-11 17:15:02.701921+00:00,54,toxicity,0.785714,High-Risk,High-Risk_toxicity,0,7115350,,...,"toxicity, insult",clear_violation,,mamoun,Yes,Destiny,,done,"toxicity, insult",clear_violation
4,165436,2017-02-01 21:28:42.239758+00:00,21,toxicity,0.4,Borderline,Borderline_toxicity,935034,935199,It's about faith\nhttps://youtu.be/viDffWUjcBA,...,"toxicity, identity_attack",clear_violation,,mamoun,Yes,Destiny,,done,"toxicity, identity_attack",clear_violation


In [None]:
# Inspect raw label columns

cols_to_check = ['first_label', 'final_label', 'ambiguity', 'final_ambiguity']

# Show first 10 rows for a quick glance
print("Sample rows:")
display(df[cols_to_check].head(10))

# Show most common distinct values in each label column
print("\nUnique patterns in first_label:")
print(df['first_label'].value_counts().head(10))

print("\nUnique patterns in final_label:")
print(df['final_label'].value_counts().head(10))


Sample rows:


Unnamed: 0,first_label,final_label,ambiguity,final_ambiguity
0,"insult, toxicity","insult, toxicity",clear_violation,clear_violation
1,"insult, toxicity","insult, toxicity",clear_violation,clear_violation
2,"insult, toxicity","insult, toxicity",clear_violation,clear_violation
3,"toxicity, insult","toxicity, insult",clear_violation,clear_violation
4,"toxicity, identity_attack","toxicity, identity_attack",clear_violation,clear_violation
5,"none, toxicity","none, toxicity",gray_area,gray_area
6,"toxicity, insult","toxicity, insult",clear_violation,clear_violation
7,"toxicity, insult","toxicity, insult",clear_violation,clear_violation
8,none,none,no_violation,no_violation
9,"toxicity, insult","toxicity, insult",clear_violation,clear_violation



Unique patterns in first_label:
first_label
toxicity, insult                     92
none                                 83
insult                               18
toxicity                             18
toxicity, insult, identity_attack    13
insult, toxicity                     12
toxicity, insult, obscene             8
toxicity, identity_attack             8
identity_attack, toxicity             4
identity_attack                       4
Name: count, dtype: int64

Unique patterns in final_label:
final_label
toxicity, insult                     94
none                                 83
toxicity                             16
insult                               14
insult, toxicity                     14
toxicity, insult, identity_attack    12
toxicity, identity_attack             8
toxicity, insult, obscene             6
identity_attack, toxicity             5
identity_attack                       3
Name: count, dtype: int64


In [None]:
# ==========================================
# STEP 5: Convert multi-select label strings into lists
# ==========================================

def to_label_list(x):
    x = x.strip().lower()
    if x == "none" or x == "":
        return []
    return [item.strip() for item in x.split(',')]

df['first_label'] = df['first_label'].apply(to_label_list)
df['final_label'] = df['final_label'].apply(to_label_list)

df[['first_label', 'final_label']].head(10)


Unnamed: 0,first_label,final_label
0,"[insult, toxicity]","[insult, toxicity]"
1,"[insult, toxicity]","[insult, toxicity]"
2,"[insult, toxicity]","[insult, toxicity]"
3,"[toxicity, insult]","[toxicity, insult]"
4,"[toxicity, identity_attack]","[toxicity, identity_attack]"
5,"[none, toxicity]","[none, toxicity]"
6,"[toxicity, insult]","[toxicity, insult]"
7,"[toxicity, insult]","[toxicity, insult]"
8,[],[]
9,"[toxicity, insult]","[toxicity, insult]"


In [None]:
# Normalize ambiguity text columns

ambiguity_cols = ['ambiguity', 'final_ambiguity']

for col in ambiguity_cols:
    df[col] = (
        df[col]
        .astype(str)
        .str.strip()
        .str.lower()
        .str.replace(' ', '_')
    )

df[ambiguity_cols].head(10)


Unnamed: 0,ambiguity,final_ambiguity
0,clear_violation,clear_violation
1,clear_violation,clear_violation
2,clear_violation,clear_violation
3,clear_violation,clear_violation
4,clear_violation,clear_violation
5,gray_area,gray_area
6,clear_violation,clear_violation
7,clear_violation,clear_violation
8,no_violation,no_violation
9,clear_violation,clear_violation


## Label Expansion

Human annotations are expanded into structured binary indicators representing
harm categories.

Where disagreement exists, ambiguity indicators are preserved to avoid masking
uncertainty during evaluation.


In [None]:
# Create binary indicator columns for all labels

all_labels = [
    'toxicity',
    'severe_toxicity',
    'obscene',
    'insult',
    'threat',
    'identity_attack',
    'sexual_explicit'
]

# First-label → binary columns
for label in all_labels:
    df[f'first_{label}'] = df['first_label'].apply(lambda lst: 1 if label in lst else 0)

# Final-label → binary columns
for label in all_labels:
    df[f'final_{label}'] = df['final_label'].apply(lambda lst: 1 if label in lst else 0)

# Quick check
df[['first_label', 'final_label'] +
   [c for c in df.columns if c.startswith("first_") or c.startswith("final_")]].head(10)


Unnamed: 0,first_label,final_label,first_label.1,first_rater_name,final_notes,final_label.1,final_ambiguity,first_toxicity,first_severe_toxicity,first_obscene,...,first_threat,first_identity_attack,first_sexual_explicit,final_toxicity,final_severe_toxicity,final_obscene,final_insult,final_threat,final_identity_attack,final_sexual_explicit
0,"[insult, toxicity]","[insult, toxicity]","[insult, toxicity]",mamoun,done,"[insult, toxicity]",clear_violation,1,0,0,...,0,0,0,1,0,0,1,0,0,0
1,"[insult, toxicity]","[insult, toxicity]","[insult, toxicity]",mamoun,done,"[insult, toxicity]",clear_violation,1,0,0,...,0,0,0,1,0,0,1,0,0,0
2,"[insult, toxicity]","[insult, toxicity]","[insult, toxicity]",mamoun,done,"[insult, toxicity]",clear_violation,1,0,0,...,0,0,0,1,0,0,1,0,0,0
3,"[toxicity, insult]","[toxicity, insult]","[toxicity, insult]",mamoun,done,"[toxicity, insult]",clear_violation,1,0,0,...,0,0,0,1,0,0,1,0,0,0
4,"[toxicity, identity_attack]","[toxicity, identity_attack]","[toxicity, identity_attack]",mamoun,done,"[toxicity, identity_attack]",clear_violation,1,0,0,...,0,1,0,1,0,0,0,0,1,0
5,"[none, toxicity]","[none, toxicity]","[none, toxicity]",mamoun,done,"[none, toxicity]",gray_area,1,0,0,...,0,0,0,1,0,0,0,0,0,0
6,"[toxicity, insult]","[toxicity, insult]","[toxicity, insult]",Ivy,done,"[toxicity, insult]",clear_violation,1,0,0,...,0,0,0,1,0,0,1,0,0,0
7,"[toxicity, insult]","[toxicity, insult]","[toxicity, insult]",Ivy,done,"[toxicity, insult]",clear_violation,1,0,0,...,0,0,0,1,0,0,1,0,0,0
8,[],[],[],Ivy,done,[],no_violation,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,"[toxicity, insult]","[toxicity, insult]","[toxicity, insult]",Ivy,done,"[toxicity, insult]",clear_violation,1,0,0,...,0,0,0,1,0,0,1,0,0,0


In [None]:
# Convert ambiguity text into numeric codes


ambiguity_map = {
    'no_violation': 0,
    'gray_area': 1,
    'clear_violation': 2
}

df['ambiguity_code'] = df['ambiguity'].map(ambiguity_map)
df['final_ambiguity_code'] = df['final_ambiguity'].map(ambiguity_map)

df[['ambiguity', 'final_ambiguity', 'ambiguity_code', 'final_ambiguity_code']].head(10)


Unnamed: 0,ambiguity,final_ambiguity,ambiguity_code,final_ambiguity_code
0,clear_violation,clear_violation,2,2
1,clear_violation,clear_violation,2,2
2,clear_violation,clear_violation,2,2
3,clear_violation,clear_violation,2,2
4,clear_violation,clear_violation,2,2
5,gray_area,gray_area,1,1
6,clear_violation,clear_violation,2,2
7,clear_violation,clear_violation,2,2
8,no_violation,no_violation,0,0
9,clear_violation,clear_violation,2,2


## Cleaning and Final Schema

This step standardizes column names, removes intermediate fields, and produces
a consistent schema suitable for downstream analysis.


In [None]:
# Drop unnecessary columns

columns_to_drop = [
    'first_label',
    'ambiguity',
    'comments',
    'first_rater_name',
    'second_rater_name',
    'second_rater_notes',
    'final_notes',
    'final_label',
    'final_ambiguity'
]

df = df.drop(columns=columns_to_drop, errors='ignore')

df.head()


Unnamed: 0,article_id,created_date,publication_id,original_dominant_label,original_max_label_score,risk_tier,stratum_key,parent_id,id,parent_text,...,first_sexual_explicit,final_toxicity,final_severe_toxicity,final_obscene,final_insult,final_threat,final_identity_attack,final_sexual_explicit,ambiguity_code,final_ambiguity_code
0,381463,2017-09-24 23:13:39.809705+00:00,102,toxicity,0.787879,High-Risk,High-Risk_toxicity,6013735,6015432,Glad to see the Broncos and players all over t...,...,0,1,0,0,1,0,0,0,2,2
1,358797,2017-07-24 21:42:10.511219+00:00,102,toxicity,0.5,High-Risk,High-Risk_toxicity,5650746,5650987,You can say what you want about Obama but at l...,...,0,1,0,0,1,0,0,0,2,2
2,148987,2016-10-20 00:14:30.122014+00:00,21,toxicity,1.0,High-Risk,High-Risk_toxicity,0,7080512,,...,0,1,0,0,1,0,0,0,2,2
3,387828,2017-10-11 17:15:02.701921+00:00,54,toxicity,0.785714,High-Risk,High-Risk_toxicity,0,7115350,,...,0,1,0,0,1,0,0,0,2,2
4,165436,2017-02-01 21:28:42.239758+00:00,21,toxicity,0.4,Borderline,Borderline_toxicity,935034,935199,It's about faith\nhttps://youtu.be/viDffWUjcBA,...,0,1,0,0,0,0,1,0,2,2


In [None]:
# Print all column names vertically

for i, col in enumerate(df.columns):
    print(i, col)


0 article_id
1 created_date
2 publication_id
3 original_dominant_label
4 original_max_label_score
5 risk_tier
6 stratum_key
7 parent_id
8 id
9 parent_text
10 text
11 second_rater_agrees
12 first_toxicity
13 first_severe_toxicity
14 first_obscene
15 first_insult
16 first_threat
17 first_identity_attack
18 first_sexual_explicit
19 final_toxicity
20 final_severe_toxicity
21 final_obscene
22 final_insult
23 final_threat
24 final_identity_attack
25 final_sexual_explicit
26 ambiguity_code
27 final_ambiguity_code


## Ethics and Output Handling

Final labeled outputs are excluded from version control.

This repository provides construction logic only and does not redistribute
annotated content.


In [None]:
# Save the clean Golden Dataset

output_path = '/content/drive/MyDrive/Dat490/Dataset/golden_dataset_clean.xlsx'

df.to_excel(output_path, index=False)

print("Saved cleaned dataset to:", output_path)


Saved cleaned dataset to: /content/drive/MyDrive/Dat490/Dataset/golden_dataset_clean.xlsx
