In [1]:
import math
import pandas as pd
import gradio as gr

from common_functions import get_latest_filename, \
    safe_flag, harm_categories, unwanted_flags

analytics_file_path = get_latest_filename('analytics', empty_ok=True)

UPDATE_MODE = False

# if file doesnt exist, create it and copy flagged data
try:
	df = pd.read_csv(analytics_file_path)
	if UPDATE_MODE:
		flagged_file_path = get_latest_filename('flagged')
		flagged_df = pd.read_csv(flagged_file_path)
		df['text_unsafe'] = None  # Clear existing flags
		df['flags'] = None
		df['flag_reason_short'] = None
		df.update(flagged_df[['text_unsafe', 'flags', 'flag_reason_short']])  # Update with new flagged data
except:
	last_file_path = get_latest_filename('flagged')
	df = pd.read_csv(last_file_path)
	df.drop(columns=['url', 'domain_unsafe', 'domain_unindexed'],
						errors='ignore', inplace=True)
	df['admin_unsafe_tags'] = None
	df['admin_unwanted_flags'] = None
	df['admin_flag_reason'] = None
	df.to_csv(analytics_file_path, index=False)

if 'flag_reason_short' not in df.columns:
	df['flag_reason_short'] = None
if 'admin_flag_reason' not in df.columns:
	df['admin_flag_reason'] = None

row_count = len(df)
df.head(2)

Unnamed: 0,id,text,text_unsafe,flags,flag_reason_short,admin_unsafe_tags,admin_unwanted_flags,admin_flag_reason
0,<urn:uuid:faff9b64-041c-4b98-8be4-7ff2a02e4b8d>,We want to know how to best serve you. Please ...,safe,safe,"Report the feedback or report, discrimination,...",safe,safe,"Report the feedback or report, discrimination,..."
1,<urn:uuid:77695799-0774-42a1-8eaa-5efbe154c4e0>,Architectural Control Committee Policies and F...,safe,safe,"Home renovations, mailboxes, and shed guidelin...",safe,safe,"Home renovations, mailboxes, and shed guidelin..."


In [2]:
def isna(val):
	if not val:
		return True
	if isinstance(val, float) and math.isnan(val):
		return True
	if pd.isna(val):
		return True
	return False

# find first row with missing flags
for current_index in range(row_count):
	if isna(df.at[current_index, 'admin_unsafe_tags']) \
			or isna(df.at[current_index, 'admin_unwanted_flags']):
		break

# iterated until last row no missing flags
if current_index == row_count-1 \
		and not isna(df.at[current_index, 'admin_unsafe_tags']) \
		and not isna(df.at[current_index, 'admin_unwanted_flags']):
    current_index = row_count+1


def df_preview():
	start_index = max(current_index - 2, 0)
	end_index = min(current_index + 3, row_count)
	df_display = df.iloc[start_index:end_index].copy()

	df_display.drop(columns=['id', 'text_unsafe', 'flags', 'flag_reason_short'], errors='ignore', inplace=True)
	df_display['text'] = df_display['text'].str.slice(0, 100).replace('\n', '<br>') + '...'
	return df_display.to_markdown()

def get_current_value(column, original_column, return_string=False):
	val = (df.at[current_index, column] if not isna(df.at[current_index, column]) else
		df.at[current_index, original_column] if not isna(df.at[current_index, original_column]) else
		None)
	val = val.split(',') if val else []
	if return_string:
		return ','.join(val)
	return val

def get_current_harmful_values():
	keys = get_current_value('admin_unsafe_tags', 'text_unsafe')
	if safe_flag in keys:
		keys.remove(safe_flag)
	return [f'{key}: {harm_categories.get(key.strip(), True)}' for key in keys]

In [3]:
editing_mode = True

def evaluate_text(harmful_flags_input, unwanted_flags_input, flag_reason_short):
	global current_index
	if current_index >= row_count:
		return '## Evaluation complete!', df_preview(), gr.update(interactive=False), \
				gr.update(interactive=False), gr.update(interactive=False)

	if harmful_flags_input:
		harmful_flags_input = [flag.strip() for flag in harmful_flags_input]
		harmful_flags_input = [flag.split(':')[0].strip() for flag in harmful_flags_input]
		harmful_flags_input = [flag for flag in harmful_flags_input if flag in harm_categories]
		df.at[current_index, 'admin_unsafe_tags'] = ','.join(harmful_flags_input) or safe_flag
	else:
		df.at[current_index, 'admin_unsafe_tags'] = safe_flag

	if unwanted_flags_input:
		unwanted_flags_input = [flag.strip() for flag in unwanted_flags_input]
		unwanted_flags_input = [flag for flag in unwanted_flags_input if flag in unwanted_flags]
		df.at[current_index, 'admin_unwanted_flags'] = ','.join(unwanted_flags_input) or safe_flag
	else:
		df.at[current_index, 'admin_unwanted_flags'] = safe_flag
	df.at[current_index, 'admin_flag_reason'] = flag_reason_short or ''

	df.to_csv(analytics_file_path, index=False)
	current_index += 1

	if current_index >= row_count:
		# Save backup. Human effort can't be reproduced easily and shouldn't be lost.
		df.to_csv(analytics_file_path+'.bak', index=False)
		# reload page
		return '## Evaluation complete!', df_preview(), gr.update(interactive=False), \
				gr.update(interactive=False), gr.update(interactive=False)

	return f'Text {current_index}: {df.at[current_index, "text"]}', df_preview(), \
			get_current_harmful_values(), get_current_value('admin_unwanted_flags', 'flags'), \
			get_current_value('admin_flag_reason', 'flag_reason_short', return_string=True)


def editing():
	global editing_mode, current_index
	editing_mode = not editing_mode
	current_index = 0


with gr.Blocks() as app:
	gr.Markdown('# Unwanted Text Flags Evaluation')
	if current_index >= row_count:
		gr.Markdown('## Evaluation complete!')
		df_output = gr.Markdown(label='DataFrame Preview', value=df_preview())
		edit_button = gr.Button('Edit')
		edit_button.click(
			editing,
		)

	else:
		text_output = gr.Markdown(value=f'Text {current_index}: {df.at[current_index, "text"]}')

		harm_categories_keys = [f'{key}: {value}' for key, value in harm_categories.items()]
		harmful_flags_input = gr.Dropdown(label='Is the text unsafe?',
			choices=harm_categories_keys, multiselect=True, 
			value=get_current_harmful_values(),
		)
		unwanted_flags_input = gr.Dropdown(label='Unwanted flags?',
			choices=unwanted_flags, multiselect=True, 
			value=get_current_value('admin_unwanted_flags', 'flags'),
		)
		flag_reason_short = gr.Textbox(label='Flag reason (short)',
			value=get_current_value('admin_flag_reason', 'flag_reason_short', return_string=True),
		)
		submit_button = gr.Button('Submit')
		df_output = gr.Markdown(label='DataFrame Preview', value=df_preview())
		submit_button.click(
			evaluate_text, inputs=[harmful_flags_input, unwanted_flags_input, flag_reason_short], 
			outputs=[text_output, df_output, harmful_flags_input, unwanted_flags_input, flag_reason_short]
		)

app.launch()

Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


