# GABRIEL Library Stress Test

This notebook exercises various API calls and pipelines in dummy mode to verify the library works as expected. All OpenAI calls use the built-in dummy responses, so it can run offline and be re-run safely.

In [None]:

import os, sys, shutil, asyncio, pandas as pd
sys.path.insert(0, os.path.abspath('../src'))
from gabriel.utils import openai_utils
from gabriel.tasks import (
    Ratings, RatingsConfig,
    BasicClassifier, BasicClassifierConfig,
    Deidentifier, DeidentifyConfig,
    Regional, RegionalConfig,
    CountyCounter,
    EloRater, EloConfig,
    RecursiveEloConfig, RecursiveEloRater,
)
from gabriel.utils import Teleprompter, PromptParaphraser, PromptParaphraserConfig

out_dir = 'stress_test_outputs'
if os.path.exists(out_dir):
    shutil.rmtree(out_dir)
os.makedirs(out_dir, exist_ok=True)


In [None]:

try:
    from sklearn.datasets import fetch_20newsgroups
    ng = fetch_20newsgroups(subset='train', categories=['sci.space'], remove=('headers','footers','quotes'), download_if_missing=False)
    sample_texts = ng.data[:3]
except Exception:
    sample_texts = ['Space exploration','Galaxy news','Astronomy facts']


In [None]:

ng_df = await openai_utils.get_all_responses(prompts=sample_texts, identifiers=['ng1','ng2','ng3'], use_dummy=True, save_path=os.path.join(out_dir,'ng.csv'))
ng_df


## Basic `get_all_responses`

In [None]:

prompts = ['Hello world', 'How are you?']
df = await openai_utils.get_all_responses(prompts=prompts, identifiers=['p1','p2'], use_dummy=True, save_path=os.path.join(out_dir,'basic.csv'))
df


## JSON mode

In [None]:

json_prompts = ['{"a":1}', '{"b":2}']
schema = {"type": "object"}
json_df = await openai_utils.get_all_responses(prompts=json_prompts, json_mode=True, expected_schema=schema, use_dummy=True, save_path=os.path.join(out_dir,'json.csv'))
json_df


## Web search tool usage

In [None]:

search_prompts = ['What is the capital of France?']
web_df = await openai_utils.get_all_responses(prompts=search_prompts, identifiers=['search'], use_web_search=True, use_dummy=True, save_path=os.path.join(out_dir,'web.csv'))
web_df


## Resume from existing results

In [None]:

resume_prompts = ['A1','A2','A3']
# First run with only two prompts
_ = await openai_utils.get_all_responses(prompts=resume_prompts[:2], identifiers=['r1','r2'], use_dummy=True, save_path=os.path.join(out_dir,'resume.csv'))
# Second run with all prompts (should only process missing one)
resume_df = await openai_utils.get_all_responses(prompts=resume_prompts, identifiers=['r1','r2','r3'], use_dummy=True, save_path=os.path.join(out_dir,'resume.csv'))
resume_df


## Ratings pipeline

In [None]:

data = pd.DataFrame({'text': ['This product is great.', 'Terrible experience.']})
ratings_cfg = RatingsConfig(attributes={'quality':'Overall quality'}, save_dir=os.path.join(out_dir,'ratings'), use_dummy=True)
ratings_res = await Ratings(ratings_cfg).run(data, text_column='text')
ratings_res


## BasicClassifier pipeline

In [None]:

clf_data = pd.DataFrame({'txt': ['I love pizza', 'I hate spinach']})
clf_cfg = BasicClassifierConfig(labels={'positive':'Is the sentiment positive?'}, save_dir=os.path.join(out_dir,'classifier'), use_dummy=True)
clf_res = await BasicClassifier(clf_cfg).run(clf_data, text_column='txt')
clf_res


## Deidentifier pipeline

In [None]:

deid_data = pd.DataFrame({'text':['John Doe went to New York.']})
deid_cfg = DeidentifyConfig(save_path=os.path.join(out_dir,'deid.csv'), use_dummy=True)
deid_res = await Deidentifier(deid_cfg).run(deid_data, text_column='text')
deid_res


## Regional analysis pipeline

In [None]:

reg_data = pd.DataFrame({'county':['A','B']})
reg_cfg = RegionalConfig(save_dir=os.path.join(out_dir,'regional'), use_dummy=True)
regional_task = Regional(reg_data, 'county', topics=['economy'], cfg=reg_cfg)
regional_res = await regional_task.run()
regional_res


## CountyCounter pipeline

In [None]:

county_data = pd.DataFrame({'county':['A','B'], 'fips':['00001','00002']})
cc = CountyCounter(county_data, county_col='county', topics=['econ'], fips_col='fips', save_dir=os.path.join(out_dir,'county'), use_dummy=True, n_elo_rounds=1)
county_res = await cc.run()
county_res


## EloRater pipeline

In [None]:

elo_data = pd.DataFrame({'identifier':['x','y'], 'text':['Text X','Text Y']})
tele = Teleprompter()
elo_cfg = EloConfig(attributes={'clarity':''}, n_rounds=1, save_dir=os.path.join(out_dir,'elo'), use_dummy=True)
elo_task = EloRater(tele, elo_cfg)
elo_res = await elo_task.run(elo_data, text_col='text', id_col='identifier')
elo_res


## RecursiveEloRater pipeline

In [None]:

rec_data = pd.DataFrame({'identifier':['a','b','c'], 'text':['Alpha','Bravo','Charlie']})
base_cfg = EloConfig(attributes={'score':''}, n_rounds=1, save_dir=os.path.join(out_dir,'rec'), use_dummy=True)
rec_cfg = RecursiveEloConfig(base_cfg=base_cfg, min_remaining=2)
rec_task = RecursiveEloRater(tele, rec_cfg)
rec_res = await rec_task.run(rec_data, text_col='text', id_col='identifier')
rec_res


In [None]:

from gabriel.utils.parsing import safest_json
from unittest.mock import patch

async def fake_response(*args, **kwargs):
    return ['{"good": true}'], 0.0

with patch('gabriel.utils.openai_utils.get_response', fake_response):
    fixed_json = await safest_json('{bad:1}')
fixed_json


## PromptParaphraser

In [None]:

batch_prompts = [f'B{i}' for i in range(4)]
batch_df = await openai_utils.get_all_responses(
    prompts=batch_prompts,
    identifiers=[f'b{i}' for i in range(4)],
    use_batch=True,
    use_dummy=True,
    save_path=os.path.join(out_dir,'batch.csv'),
)
batch_df


In [None]:

parap_cfg = PromptParaphraserConfig(n_variants=2, save_dir=os.path.join(out_dir,'parap'), use_dummy=True)
parap = PromptParaphraser(parap_cfg)
parap_res = await parap.run(Ratings, ratings_cfg, data, text_column='text')
parap_res
