# Prototype Sample Size extraction using GPT

Goal: Test prompt engineering on the easiest use case: single 

In [1]:
import pandas as pd
from labelrepo.projects.participant_demographics import (
    get_participant_demographics,
)

subgroups = get_participant_demographics()

In [2]:
abstracts = pd.read_csv('data/abstracts.csv')

In [3]:
jerome_pd = subgroups[(subgroups.project_name == 'participant_demographics') & (subgroups.annotator_name == 'Jerome_Dockes')]

In [4]:
counts = jerome_pd.groupby('pmcid').count().reset_index()
single_group_pmcids = counts[counts['count'] == 1].pmcid

### Single group with Sample Size in Abstract

In [5]:
single_group = jerome_pd[jerome_pd.pmcid.isin(single_group_pmcids)]

In [6]:
single_group = pd.merge(single_group, abstracts[abstracts.pmcid.isin(single_group.pmcid)])

In [7]:
# Heuristic for finding in abstract studies
# Using fixed list below after manual checking
# single_group['in_abstract'] = \
#  single_group.apply(lambda x: (re.search(f"\D{x['count']}", x.abstract) != None), axis = 1)

In [8]:
# Manual list
not_in_abstract = [5548834, 4029023, 4318429, 5324609, 8752963, 5218407, 3147157, 2775905, 6344389, 8837589, 7430162, 7563756, 7156375, 4330553, 6989437, 6328158, 3409150, 3775427, 3483694, 6787094, 6528067, 3869649, 3183226, 6868994, 7002496, 6492297, 3780305]

In [9]:
single_group['in_abstract'] = single_group.pmcid.isin(not_in_abstract) == False

## Extract simple sample size

In [10]:
import openai
from templates import ZERO_SHOT_SINGLE_GROUP_DEMOGRAPHICS
from extract import extract_from_multiple

openai.api_key = open('/home/zorro/.keys/open_ai.key').read().strip()

In [11]:
predictions = extract_from_multiple(single_group.abstract.to_list(), **ZERO_SHOT_SINGLE_GROUP_DEMOGRAPHICS, num_workers=4

 60%|█████████████████████████▊                 | 45/75 [00:07<00:05,  5.85it/s]


RateLimitError: Rate limit reached for default-gpt-3.5-turbo in organization org-PtkFiGSkwXgt54GwiYiGgbmy on tokens per min. Limit: 90000 / min. Current: 89604 / min. Contact us through our help center at help.openai.com if you continue to have issues.

In [None]:
# Normalize values
predictions['pmcid'] = single_group['pmcid']
predictions['count'] = predictions['count'].astype('str')
single_group['count'] = single_group['count'].astype('str')

predictions['correct'] = single_group['count'] == predictions['count']

### Only look at those with sample size in abstract

In [None]:
in_abstract_predictions = predictions[single_group['in_abstract']]

In [None]:
# Percentage correct
in_abstract_predictions['correct'].mean()

In [None]:
# Incorrect predictions
in_abstract_predictions[in_abstract_predictions['correct'] == False]

Notes: First error is due to pulling out sample size for behavioral task, not fMRI task. 

Second error is due to study being a single subject study.

### Any hallucinations?

In [None]:
no_abstract_predictions = predictions[single_group['in_abstract'] == False]

In [None]:
# Predicted `n/a` or got correct answer (i.e. misclassified as not having info in abstract)
((no_abstract_predictions['count'] == 'n/a')).mean()

In [None]:
# Studies where the model was wrong, and did not predict an `n/a`
no_abstract_predictions[(no_abstract_predictions['count'] != 'n/a') & (no_abstract_predictions['correct'] == False)]

Overall notes: 

Errors:
- 2 error due to discrepancy between methods and abstract count (final n vs starting n)
- 1 n/a should be = 1
- 1 was for behavioral count, not fMRI count

False positives:
- Age was extracted instead of sample size


Excluding abstract/full-text discrepancies, error rate is 3/75

### View incorrect abstracts

In [None]:
pmcid = 3780305
text = abstracts[abstracts.pmcid == pmcid]['abstract'].tolist()[0].encode("ascii", "ignore").decode()

In [None]:
text

In [None]:
single_group[single_group.pmcid == pmcid]