In [1]:
import os
from google import genai
from google.genai import types
import pathlib
import wave

In [2]:
FILEPATH = pathlib.Path('')
PPMT = 10 # price per million tokens for audio generation in dollars. Used to set a price cap for the audio generation.
PRICE_CAP = 0.5 # price cap for the audio generation in dollars.
gemini_api_key = os.environ['GEMINI_API_KEY']

In [None]:
client = genai.Client()

# Retrieve and encode the PDF byte
prompt = "Print out all of the text in the paper that a narrator would read aloud. Do not include citation numbers. Do not include methods, acknowledgements, bibliography, reporting summary, or competing interests."
parsed_response = client.models.generate_content(
  model="gemini-2.5-flash",
  contents=[
      types.Part.from_bytes(
        data=FILEPATH.read_bytes(),
        mime_type='application/pdf',
      ),
      prompt])

print(parsed_response.text)


In [None]:
# Check extracted text to make sure it looks ok
parsed_response.text

In [19]:
# Set up the wave file to save the output:
def wave_file(filename, pcm, channels=1, rate=24000, sample_width=2):
   with wave.open(filename, "wb") as wf:
      wf.setnchannels(channels)
      wf.setsampwidth(sample_width)
      wf.setframerate(rate)
      wf.writeframes(pcm)

In [None]:
text_token_count = client.models.count_tokens(
    model="gemini-2.5-flash-preview-tts", contents=parsed_response.text
)

cost_estimate = PPMT*text_token_count.total_tokens/1000000
print(text_token_count)
print(f'estimated cost = {cost_estimate}$')

In [None]:
if cost_estimate < PRICE_CAP:
   response = client.models.generate_content(
      model="gemini-2.5-flash-preview-tts",
      contents="Read:"+parsed_response.text,
      config=types.GenerateContentConfig(
         response_modalities=["AUDIO"],
         speech_config=types.SpeechConfig(
            voice_config=types.VoiceConfig(
               prebuilt_voice_config=types.PrebuiltVoiceConfig(
                  voice_name='Kore',
               )
            )
         ),
      )
   )

In [30]:
data = response.candidates[0].content.parts[0].inline_data.data

file_name='saved_paper.wav'
# file_name = ''
wave_file(file_name, data) # Saves the file to current directory