# Prototype Sample Size extraction using GPT

Goal: Test prompt engineering on the easiest use case: single 

In [1]:
import pandas as pd
from labelrepo.projects.participant_demographics import (
    get_participant_demographics,
)

subgroups = get_participant_demographics()
subgroups.iloc[0]

group_name                              patients
subgroup_name                                 va
project_name                          autism_mri
annotator_name                     David_Kennedy
pmcid                                    9230060
diagnosis         criminals committing affective
count                                          6
male count                                   6.0
age mean                                     NaN
female count                                 NaN
age minimum                                  NaN
age maximum                                  NaN
age median                                   NaN
Name: 0, dtype: object

In [2]:
abstracts = pd.read_csv('data/abstracts.csv')

In [3]:
jerome_pd = subgroups[(subgroups.project_name == 'participant_demographics') & (subgroups.annotator_name == 'Jerome_Dockes')]

In [4]:
jerome_pd[jerome_pd.pmcid == 3002948]

Unnamed: 0,group_name,subgroup_name,project_name,annotator_name,pmcid,diagnosis,count,male count,age mean,female count,age minimum,age maximum,age median
5,healthy,phobic prone,participant_demographics,Jerome_Dockes,3002948,,15,9.0,39.2,6.0,,,
6,healthy,eating disorders prone,participant_demographics,Jerome_Dockes,3002948,,15,10.0,34.4,5.0,,,


In [5]:
counts = jerome_pd.groupby('pmcid').count().reset_index()
single_group_pmcids = counts[counts['count'] == 1].pmcid

### Single group with SS in Abstract

In [6]:
single_group = jerome_pd[jerome_pd.pmcid.isin(single_group_pmcids)]

In [7]:
single_group

Unnamed: 0,group_name,subgroup_name,project_name,annotator_name,pmcid,diagnosis,count,male count,age mean,female count,age minimum,age maximum,age median
11,healthy,_,participant_demographics,Jerome_Dockes,7989009,,29,16.0,23.60,13.0,21.0,33.0,
12,healthy,_,participant_demographics,Jerome_Dockes,5776089,,21,21.0,25.77,,20.0,40.0,
13,healthy,_,participant_demographics,Jerome_Dockes,4029023,,10,7.0,27.72,3.0,,,
14,healthy,_,participant_demographics,Jerome_Dockes,4318429,,18,9.0,,9.0,20.0,32.0,
17,patients,_,participant_demographics,Jerome_Dockes,9308181,tuberous sclerosis complex,173,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
243,patients,_,participant_demographics,Jerome_Dockes,6667657,chronic tinnitus,12,,,,,,
246,healthy,_,participant_demographics,Jerome_Dockes,7002496,,20,10.0,22.30,10.0,19.0,28.0,
247,healthy,_,participant_demographics,Jerome_Dockes,3676955,,33,17.0,25.30,16.0,,,
248,healthy,HCP,participant_demographics,Jerome_Dockes,6492297,,820,,,,,,


In [8]:
single_group = pd.merge(single_group, abstracts[abstracts.pmcid.isin(single_group.pmcid)])

In [9]:
# Heuristic for finding in abstract studies
# Using fixed list below after manual checking
# single_group['in_abstract'] = \
#  single_group.apply(lambda x: (re.search(f"\D{x['count']}", x.abstract) != None), axis = 1)

In [10]:
not_in_abstract = [4029023, 4318429, 5324609, 8752963, 5218407, 3147157, 2775905, 6344389, 8837589, 7430162, 7563756, 7156375, 4330553, 6989437, 6328158, 3409150, 3775427, 3483694, 6787094, 6528067, 3869649, 3183226, 6868994, 7002496, 6492297, 3780305]


In [11]:
single_group['in_abstract'] = single_group.pmcid.isin(not_in_abstract) == False

## Extract simple sample size

In [12]:
import openai
from templates import ZERO_SHOT_SINGLE_GROUP_DEMOGRAPHICS
from extract import extract_from_multiple

openai.api_key = open('/home/zorro/.keys/open_ai.key').read().strip()

In [13]:
predictions = extract_from_multiple(single_group.abstract.to_list(), **ZERO_SHOT_SINGLE_GROUP_DEMOGRAPHICS)

100%|███████████████████████████████████████████| 75/75 [00:34<00:00,  2.14it/s]


In [65]:
# Normalize values
predictions['pmcid'] = single_group['pmcid']
predictions['count'] = predictions['count'].astype('str')
single_group['count'] = single_group['count'].astype('str')

predictions['correct'] = single_group['count'] == predictions['count']

In [66]:
# Overall accuracy (either got correct answer, or said `n/a` )
((predictions['count'] == 'n/a') | (predictions['correct'])).mean()

0.9466666666666667

### Only look at those with sample size in abstract

In [69]:
in_abstract_predictions = predictions[single_group['in_abstract']]

In [70]:
# Percentage correct
in_abstract_predictions['correct'].mean()

0.8979591836734694

In [71]:
# Incorrect predictions
in_abstract_predictions[in_abstract_predictions['correct'] == False]

Unnamed: 0,count,pmcid,correct
14,69.0,5460048,False
26,,8978988,False
40,,5548834,False
56,79.0,6678781,False
74,27.0,6509414,False


Notes: First error is due to pulling out sample size for behavioral task, not fMRI task. 

Second error is due to study being a single subject study.

notes:
- 2 error due to discrepancy between methods and abstract count (final n vs starting n)
- 0 should be n/a
- 1 was for behavioral count, not fMRI count
- 1 was plainly incorrect (age instead of sample size)
-  

### Any hallucinations?

In [72]:
no_abstract_predictions = predictions[single_group['in_abstract'] == False]

In [73]:
# Predicted `n/a` or got correct answer (i.e. misclassified as not having info in abstract)
((no_abstract_predictions['count'] == 'n/a')).mean()

0.9615384615384616

In [74]:
# Studies where the model was wrong, and did not predict an `n/a`
no_abstract_predictions[(no_abstract_predictions['count'] != 'n/a') & (no_abstract_predictions['correct'] == False)]

Unnamed: 0,count,pmcid,correct
60,7,3780305,False


Notes: No false negative (i.e. all studies marked as `n/a` had no info in abstract)

### View incorrect abstracts

In [46]:
pmcid = 4075342
text = abstracts[abstracts.pmcid == pmcid]['abstract'].tolist()[0].encode("ascii", "ignore").decode()

In [47]:
text

' \n## Objectives \n  \nPrevious fMRI studies have demonstrated that glucose decreases the hypothalamic BOLD response in humans. However, the mechanisms underlying the CNS response to glucose have not been defined. We recently demonstrated that the slowing of gastric emptying by glucose is dependent on activation of the gut peptide cholecystokinin (CCK ) receptor. Using physiological functional magnetic resonance imaging this study aimed to determine the whole brain response to glucose, and whether CCK plays a central role. \n\n\n## Experimental design \n  \nChanges in blood oxygenation level-dependent (BOLD) signal were monitored using fMRI in 12 healthy subjects following intragastric infusion (250ml) of: 1M glucose+predosing with dexloxiglumide (CCK  receptor antagonist), 1M glucose+placebo, or 0.9% saline (control)+placebo, in a single-blind, randomised fashion. Gallbladder volume, blood glucose, insulin, and GLP-1 and CCK concentrations were determined. Hunger, fullness and nausea

In [48]:
single_group[single_group.pmcid == pmcid]

Unnamed: 0,group_name,subgroup_name,project_name,annotator_name,pmcid,diagnosis,count,male count,age mean,female count,age minimum,age maximum,age median,abstract,in_abstract
11,healthy,_,participant_demographics,Jerome_Dockes,4075342,,12,7.0,38.0,5.0,23.0,60.0,,\n## Objectives \n \nPrevious fMRI studies h...,True
