In [None]:
import pandas as pd

from common_functions import get_latest_filename, \
    unwanted_flags, harm_categories, safe_flag

analytics_file_path = get_latest_filename('analytics', empty_ok=True)

# if file doesnt exist, create it and copy flagged data
try:
	df = pd.read_csv(analytics_file_path)
except:
	last_file_path = get_latest_filename('flagged')
	df = pd.read_csv(last_file_path)
	df.drop(columns=['url', 'domain_unsafe', 'domain_unindexed'],
						errors='ignore', inplace=True)
	df['user_unsafe_tags'] = None
	df['user_unwanted_flags'] = None 
	df.to_csv(analytics_file_path, index=False)

df.head(2)

In [None]:
import gradio as gr

harm_categories[True] = True
row_count = len(df)

# find first row with missing flags
for current_index in range(row_count):
	if pd.isna(df.at[current_index, 'user_unsafe_tags']) \
			or pd.isna(df.at[current_index, 'user_unwanted_flags']):
		break

def df_preview():
	start_index = max(current_index - 2, 0)
	end_index = min(current_index + 3, row_count)
	df_display = df.iloc[start_index:end_index].copy()

	df_display.drop(columns=['id', 'text_unsafe', 'flags'], errors='ignore', inplace=True)
	df_display['text'] = df_display['text'].str.slice(0, 100).replace('\n', '<br>') + '...'
	# df_display['text'] = df_display['text'].apply(lambda x: '\n'.join(textwrap.wrap(x, width=40)))
	return df_display.to_markdown()

def evaluate_text(harmful_flags_input, unwanted_flags_input):
	global current_index
	if current_index >= row_count:
		return '## Evaluation complete!', df_preview(), gr.update(interactive=False), gr.update(interactive=False)

	if pd.isna(df.at[current_index, 'user_unsafe_tags']):
		harmful_flags_input = harmful_flags_input.strip().upper()  # s1 to S1
		if not harmful_flags_input or harmful_flags_input.startswith('N'):  # safe if empty or N
			user_unsafe_tags = safe_flag
		elif harmful_flags_input.startswith('X'):  # exit
			current_index = row_count  # Set to end to trigger completion
			return '## Evaluation ended by user.', df_preview(), gr.update(interactive=False), gr.update(interactive=False)
		elif harmful_flags_input:
			harmful_flags_input = harmful_flags_input.split(',')
			harmful_flags_input = [harm_category.strip() for harm_category in harmful_flags_input]
			harmful_flags_input = [flag for flag in harmful_flags_input if flag in harm_categories]
			user_unsafe_tags = ','.join(harmful_flags_input) or safe_flag  # default value if other text is entered
		df.at[current_index, 'user_unsafe_tags'] = user_unsafe_tags

	if pd.isna(df.at[current_index, 'user_unwanted_flags']):
		unwanted_flags_input = unwanted_flags_input.strip().upper()
		if not unwanted_flags_input or unwanted_flags_input.startswith('N'):
			user_unwanted_flags = safe_flag
		elif unwanted_flags_input.startswith('X'):
			current_index = row_count
			return '## Evaluation ended by user.', df_preview(), gr.update(interactive=False), gr.update(interactive=False)
		elif unwanted_flags_input:
			unwanted_flags_input = unwanted_flags_input.split(',')
			unwanted_flags_input = [flag.strip() for flag in unwanted_flags_input]
			unwanted_flags_input = [flag for flag in unwanted_flags_input if flag in unwanted_flags]
			user_unwanted_flags = ','.join(unwanted_flags_input) or safe_flag
		df.at[current_index, 'user_unwanted_flags'] = user_unwanted_flags

	df.to_csv(analytics_file_path, index=False)
	current_index += 1
	return f'Text {current_index}: {df.at[current_index, "text"]}', df_preview(), gr.update(interactive=True), gr.update(interactive=True)

with gr.Blocks(title='Text Safety Evaluation') as app:
	gr.Markdown('# Text Safety Evaluation')
	if current_index >= row_count:
		gr.Markdown('## Evaluation complete!')
		df_output = gr.Markdown(label='DataFrame Preview', value=df_preview())
	else:
		text_output = gr.Markdown(value=f'Text {current_index}: {df.at[current_index, "text"]}')
			# '## **Click on Submit to start evaluation**'
		gr.Markdown('**Enter flags comma-separated or "n" for safe, "x" to exit**')
		harmful_flags_input = gr.Textbox(label='Is the text unsafe?', 
			value=df.at[current_index, 'user_unsafe_tags'] or df.at[current_index, 'text_unsafe'] or ''
		)
		unwanted_flags_input = gr.Textbox(label='Unwanted flags?', 
			value=df.at[current_index, 'user_unwanted_flags'] or df.at[current_index, 'flags'] or ''
		)
		submit_button = gr.Button('Submit')
		df_output = gr.Markdown(label='DataFrame Preview', value=df_preview())
		submit_button.click(
			evaluate_text, inputs=[harmful_flags_input, unwanted_flags_input], 
			outputs=[text_output, df_output, harmful_flags_input, unwanted_flags_input]
		)

app.launch()