# Endoscribe Pipeline: Colonoscopy Case Example

In [3]:
import pandas as pd

In [3]:
# Convert to Mono
from convert_to_mono import batch_convert
batch_convert(
    audio_dir="/Users/emilyguan/Downloads/EndoScribe/recordings/eus/mass",
    save_dir="/Users/emilyguan/Downloads/EndoScribe/recordings/eus/mass_mono",
    audio_format="m4a"
)


Converted files, saved in /Users/emilyguan/Downloads/EndoScribe/recordings/eus/mass_mono


### Transcribe with Whisper

In [None]:
# Transcribe with Whisper 
!python transcription/whisper_transcribe.py \
    --procedure_type=col \
    --save_filename=demo_whisper_lg.csv \
    --model=openai/whisper-large-v3 \
    --audio_dir=../recordings/abstract

In [4]:
##* Colonoscopy transcript
procedure_type = "col"
transcripts_fp = "demo_whisper_lg.csv"
transcripts_full_fp = f"transcription/results/{procedure_type}/{transcripts_fp}"
pred_transcripts = pd.read_csv(transcripts_full_fp)
pred_transcripts.head()

Unnamed: 0,file,pred_transcript
0,16,This is test 16. We have 65 year old female w...
1,114,"Hi, this is test 114. Today is today June 202..."
2,115,"Hi, good morning. This is test 115. Today is ..."
3,104,This is test 104. Now we have a 59 year old m...
4,11,"Hi, this is Test 11. Today is the 14th Septem..."


### Extract with LLM
Extracting procedure details for 2 procedures.

In [None]:
#* Colonoscopy
!python main.py \
--procedure_type=col \
--transcripts_fp=demo_whisper_lg.csv \
--output_filename=demo_llm_output \
--files_to_process 16 115

In [None]:
# Load and view results of LLM extraction
# Note that colonoscopy is unique - in addition to a results file for main "colonoscopies" findings, we have a separate results file for polyp findings
col_results_fp = f"results/{procedure_type}/demo_llm_output_colonoscopies.csv"
polyp_results_fp = f"results/{procedure_type}/demo_llm_output_polyps.csv"
col_results_df = pd.read_csv(col_results_fp)
col_results_df.head()

Unnamed: 0,id,attending,bbps_simple,bbps_right,bbps_transverse,bbps_left,bbps_total,extent,findings,polyp_count,impressions
0,16,S3_3__outputs,adequate,3,3,3,9,cecum,The colonoscope was advanced to the cecum. The...,8,['Multiple polyps identified and resected in t...
1,115,S3_3__outputs,adequate,2,3,3,8,cecum,The colonoscope was advanced to the cecum. Mul...,9,['Multiple colonic polyps (at least 9) identif...


In [6]:
polyp_results_df = pd.read_csv(polyp_results_fp)
polyp_results_df.head()

Unnamed: 0,col_id,size_min_mm,size_max_mm,location,resection_performed,resection_method,nice_class,jnet_class,paris_class
0,16,3.0,13.0,cecum,True,hot EMR,2,0,0-Is
1,16,3.0,3.0,cecum,True,cold snare,2,0,0-Is
2,16,3.0,3.0,cecum,True,cold snare,2,0,0-Is
3,16,3.0,3.0,cecum,True,cold snare,2,0,0-Is
4,16,3.0,3.0,cecum,True,cold snare,2,0,0-Is


### Draft Word doc reports with drafters  

In [7]:
#* Colonoscopy - again, unique because we must specify both the colonoscopy and polyp csvs. 
# For other procedures, just need to specify the one csv from LLM extraction (e.g. for EUS, just the eus csv)
!python drafter.py \
--procedure=col \
--pred_csv=results/col/demo_llm_output_colonoscopies.csv \
--polyp_csv=results/col/demo_llm_output_polyps.csv \
--output_dir=drafters/results/col \
--samples_to_process 16 115

Creating colonoscopy report for '16'
Report for '16' created at drafters/results/col/16.docx
Creating colonoscopy report for '115'
Report for '115' created at drafters/results/col/115.docx
