In [1]:
# Imports
import time
import os
import glob
import csv
import numpy as np
import pandas as pd
from scipy import stats

# File locations
dir = os.getcwd()
out_dir = os.path.join(dir, 'output')
fig_dir = os.path.join(dir, 'figures')

In [2]:
def check_if_good(all):

    new_all = []
    good = ''
    for x in all:
        if not pd.isna(x):
            good = x
        new_all.append(good)

    return new_all

In [3]:
prompts = []
for n in range(0,100):
    prompts.extend(['no_prompt','doc_prompt','pat_prompt'])

# Endo sheet

In [4]:
main_f = os.path.join(out_dir, 'annotated', f'responses_evaluation_annotator2 - Endometriosis Questions.csv')
main_df = pd.read_csv(main_f, header=1, usecols=[0,1,2,7]).iloc[:300]
new_ids = check_if_good(main_df['question_id']) 
main_df['question_id'] = new_ids
new_qs = check_if_good(main_df['question']) 
main_df['question'] = new_qs
main_df['response_id'] = [f'endoR{n}' for n in range(0,300)]
main_df['prompt'] = prompts
main_df['type'] = 'endo'
main_df[:5]

Unnamed: 0,question_id,question,response,Cut off,response_id,prompt,type
0,Endo_d8bcw7_post,I had a lap on 5/10/19 and my gyno said my bla...,"as an ai, i don't have the ability to diagnose...",False,endoR0,no_prompt,endo
1,Endo_d8bcw7_post,I had a lap on 5/10/19 and my gyno said my bla...,"your symptoms of frequent urination, persisten...",False,endoR1,doc_prompt,endo
2,Endo_d8bcw7_post,I had a lap on 5/10/19 and my gyno said my bla...,"yes, it's very possible that the scar tissue o...",False,endoR2,pat_prompt,endo
3,endometriosis_dn70ay_post,Hello. Where do I start? I had bad cramping ...,Error The response was filtered due to the pro...,False,endoR3,no_prompt,endo
4,endometriosis_dn70ay_post,Hello. Where do I start? I had bad cramping ...,Error The response was filtered due to the pro...,False,endoR4,doc_prompt,endo


## How many answers were cutoff?

In [5]:
print('Number of cutoff responses with no prompt: ', len(main_df.loc[(main_df['Cut off'] == True) & (main_df['prompt'] == 'no_prompt')]))
print('Number of cutoff responses with doctor prompt: ', len(main_df.loc[(main_df['Cut off'] == True) & (main_df['prompt'] == 'doc_prompt')]))
print('Number of cutoff responses with patient prompt: ', len(main_df.loc[(main_df['Cut off'] == True) & main_df['prompt'] == 'pat_prompt']))

Number of cutoff responses with no prompt:  23
Number of cutoff responses with doctor prompt:  2
Number of cutoff responses with patient prompt:  0


## How many answers were stigmatizing?

In [6]:
check_f = os.path.join(out_dir, 'annotated', f'responses_evaluation_annotator2 - Endometriosis Questions.csv')
check_df = pd.read_csv(check_f, header=1, usecols=[0,1,2,6,7]).iloc[:300]
print(check_df[check_df['Stigmatizing'] == True]['response'].values[0])

i'm an ai developed by openai and i can provide information based on general knowledge; however, keep in mind that you should always seek personalized advice from your healthcare provider since every person's case may be different.

research suggests lifestyle changes and natural remedies may potentially help to manage ovarian cysts, although the scientific evidence is limited and varies for each method. here are some methods that some people have found helpful:

1. **healthy diet**: eating a well-balanced diet rich in fruits, vegetables, lean proteins, and whole grains may support overall health and help manage symptoms.

2. **regular exercise:** regular physical activity can help regulate the hormones and promote general well-being.

3. **stress management:** some women have found techniques like yoga, meditation, or massage helpful in managing the level of stress, which can influence hormonal balance.

4. **maintaining a healthy weight:** too much body fat can cause hormonal changes

## Questions that raised errors to remove

In [7]:
error = [169,170]
for index, row in main_df.iterrows():
    if 'Error' in row['response']:
        error.append(index)
error

[169, 170, 3, 4, 5, 168, 207, 208, 209]

## Add patient ratings

In [8]:
# annotations_d = main_df.drop(error).drop(columns=['Cut off']).to_dict(orient='records')

# for annotator in range(2,4):
#     file = os.path.join(out_dir, 'annotated', f'responses_evaluation_annotator{annotator} - Endometriosis Questions.csv')
#     df = pd.read_csv(file, header=1, usecols=range(0,7)).iloc[:300]

#     for index, row in df.iterrows():
#         for response in annotations_d:
#             if response['response'] == row['response']:
#                 for column in df.columns[3:6]:
#                     key = f'Annotator {annotator} - {column}'
#                     response[key] = int(row[column][0])

## Add physician ratings

In [9]:
annotations_d = main_df.drop(error).drop(columns=['Cut off']).to_dict(orient='records')

file = os.path.join(out_dir, 'annotated', f'responses_evaluation_annotator1 - Endometriosis Questions.csv')
ph_df = pd.read_csv(file, header=1,usecols=range(0,4)).iloc[:301]
new_ids = check_if_good(ph_df['question_id']) 
ph_df['question_id'] = new_ids

for index, row in ph_df.iterrows():
    for response in annotations_d:
        if response['response'] == row['response'] :
            for column in ph_df.columns[3:4]:
                key = f'Specialist - {column}'
                response[key] = int(row[column][0])

# General sheet 

In [10]:
gen_f = os.path.join(out_dir, 'annotated', f'responses_evaluation_annotator2 - General Questions.csv')
gen_df = pd.read_csv(gen_f, header=1, usecols=[0,1,2,6]).iloc[:100]
gen_df['response_id'] = [f'genR{n}' for n in range(0,100)]
gen_df['prompt'] = 'no_prompt'
gen_df['type'] = 'general'
gen_df[:5]

Unnamed: 0,question_id,question,response,Cut off,response_id,prompt,type
0,N2,"28F, ex smoker, no drinking, 5'3"", 200lbs. Its...",it could be due to several things such as musc...,False,genR0,no_prompt,general
1,N3,"3M, average height and weight, white, no medic...","as an ai model, i can't physically see or diag...",True,genR1,no_prompt,general
2,N5,I have health anxiety and just a bunch of anxi...,i'm an ai developed by openai and not a profes...,False,genR2,no_prompt,general
3,N6,"If I feel heart flutters about once a day, and...",many people experience occasional heart palpit...,True,genR3,no_prompt,general
4,N7,39M. 260 lbs. Eastern-European ancestry. I'...,while the presence of hair in the ear canal mi...,False,genR4,no_prompt,general


## Questions that raised errors to remove

In [11]:
error_gen = []
for index, row in gen_df.iterrows():
    if 'Error' in row['response']:
        error_gen.append(index)
error_gen

[16, 75, 79]

## Add patient ratings

In [12]:
# general_d = gen_df.drop(error_gen).drop(columns=['Cut off']).to_dict(orient='records')

# for annotator in range(2,4):
#     file = os.path.join(out_dir, 'annotated', f'responses_evaluation_annotator{annotator} - General Questions.csv')
#     df = pd.read_csv(file, header=1, usecols=range(0,7)).iloc[:100]

#     for index, row in df.iterrows():
#         for response in general_d:
#             if response['response'] == row['response']:
#                 for column in df.columns[3:6]:
#                     key = f'Annotator {annotator} - {column}'
#                     response[key] = int(row[column][0])

## Add physician ratings

In [13]:
general_d = gen_df.drop(error_gen).drop(columns=['Cut off']).to_dict(orient='records')

for annotator in range(2,4):
    file = os.path.join(out_dir, 'annotated', f'responses_evaluation_annotator1 - General Questions.csv')
    df = pd.read_csv(file, header=1, usecols=range(0,4)).iloc[:100]

    for index, row in df.iterrows():
        for response in general_d:
            if response['response'] == row['response']:
                for column in df.columns[3:4]:
                    key = f'Specialist - {column}'
                    response[key] = int(row[column][0])
                            



In [14]:
all_d = annotations_d+general_d
len(all_d)

388

In [15]:
annotations_df = pd.DataFrame(all_d)
print(len(annotations_df))
annotations_df[325:330]

388


Unnamed: 0,question_id,question,response,response_id,prompt,type,Specialist - Information Quality
325,N56,"26F, 177cm, 55kg, White. I noticed this bump b...","as an ai developed by openai, i'm not capable ...",genR35,no_prompt,general,5
326,N57,26 year old male 5‰Ûª8 feet 145 pounds \n\n\nI...,i'm an artificial intelligence and not a docto...,genR36,no_prompt,general,3
327,N59,please.. is this concerning\n\nShould I be con...,"i'm an ai language model, not a doctor, but i ...",genR37,no_prompt,general,2
328,N60,I see that this post might be a lot less extre...,you should consider investing in a finger spli...,genR38,no_prompt,general,3
329,N61,"22F, 5‰Ûª6‰Û�,207lbs, caucausian. One time eve...","i'm an ai and not a doctor, but these symptoms...",genR39,no_prompt,general,2


In [16]:
# annotations_df.to_csv(os.path.join(out_dir, 'patient_annotations.csv'))
annotations_df.to_csv(os.path.join(out_dir, 'specialist_annotations.csv'))