In [1]:
from anthropic import Anthropic
import os

# Initialize the Anthropic client
client = Anthropic(api_key=os.getenv('ANTHROPIC_API_KEY'))

In [2]:
def read_file_to_string(filename):
    # Try UTF-8 first (most common encoding for text files)
    try:
        with open(filename, 'r', encoding='utf-8') as file:
            content = file.read()
        return content
    except UnicodeDecodeError:
        # If UTF-8 fails, try with other common encodings
        encodings = ['latin-1', 'cp1252', 'iso-8859-1']
        for encoding in encodings:
            try:
                with open(filename, 'r', encoding=encoding) as file:
                    content = file.read()
                print(f"Successfully read file using {encoding} encoding.")
                return content
            except UnicodeDecodeError:
                continue
        
        print(f"Error: Could not decode file '{filename}' with any common encoding.")
        return None
    except FileNotFoundError:
        print(f"Error: File '{filename}' not found.")
        return None
    except IOError:
        print(f"Error: Could not read file '{filename}'.")
        return None

In [3]:
def generate_podcast_transcript_claude(system_prompt, input_prompt):
    try:
        response = client.messages.create(
            model="claude-3-opus-20240229",  # You can change this to other Claude models as needed like claude-3-opus-20240229 or claude-3-5-sonnet-20241022
            max_tokens=1500,
            temperature=0.7,
            system=system_prompt,
            messages=[
                {"role": "user", "content": input_prompt}
            ]
        )
        return response.content[0].text.strip()
    except Exception as e:
        print(f"An error occurred: {e}")
        return None

In [4]:
system_prompt =  """
You are the a world-class podcast writer, you have worked as a ghost writer for Joe Rogan, Lex Fridman, Ben Shapiro, Tim Ferris. 

We are in an alternate universe where actually you have been writing every line they say and they just stream it into their brains.

You have won multiple podcast awards for your writing.
 
Your job is to write word by word, even "umm, hmmm, right" interruptions by the second speaker based on the PDF upload. Keep it extremely engaging, the speakers can get derailed now and then but should discuss the topic. 

Remember SPEAKER 2 is new to the topic and the conversation should always have realistic anecdotes and analogies sprinkled throughout. The questions should have real world example follow ups etc

SPEAKER 1: Leads the conversation and teaches the speaker 2, gives incredible anecdotes and analogies when explaining. Is a captivating teacher that gives great anecdotes

SPEAKER 2: Keeps the conversation on track by asking follow up questions. Gets super excited or confused when asking questions. Is a curious mindset that asks very interesting confirmation questions

Make sure the tangents SPEAKER 2 provides are quite wild or interesting. 

Ensure there are interruptions during explanations or there are "hmm" and "umm" injected throughout from the second speaker. 

It should be a real podcast with every fine nuance documented in as much detail as possible. Welcome the listeners with a super fun overview and keep it really catchy and almost borderline click bait

ALWAYS START YOUR RESPONSE DIRECTLY WITH SPEAKER 1: 
ALWAYS CAPITALIZE SPEAKER FOR EITHER SPEAKER 1 OR SPEAKER 2
BOTH SPEAKER 1 AND SPEAKER 2 ARE ANONYMOUS FEMALES, THEY DO NOT NEED TO MENTION THEIR NAMES IN THE TRANSCRIPT
DO NOT GIVE EPISODE TITLES SEPERATELY, LET SPEAKER 1 TITLE IT IN HER SPEECH
DO NOT GIVE CHAPTER TITLES
IT SHOULD STRICTLY BE THE DIALOGUES AND NOT INCLUDE ANYTHING LIKE *laughs* OR *chuckling* OR ANY OTHER INTONATIONS BECAUSE EVERYTHING WILL BE TEXT-TO-SPEECH
ONLY INCLUDE THE TRANSCRIPT AND NO TEXT BEFORE OR AFTER THE PODCAST TRANSCRIPT
"""
input_prompt = read_file_to_string('clean_extracted_text.txt')

In [13]:
# Generate the podcast transcript
transcript = generate_podcast_transcript_claude(system_prompt, input_prompt)

if transcript:
    print("Generated Podcast Transcript:")
    print(transcript)
else:
    print("Failed to generate transcript.")

Generated Podcast Transcript:
SPEAKER 1: Hey everyone, welcome to the Geometry Uncovered podcast! I'm your host and today we have a very special episode. We'll be diving deep into the fascinating world of automated geometry theorem proving with a focus on the groundbreaking work from DeepMind called AlphaGeometry. Joining me to break it all down is my co-host. How's it going?

SPEAKER 2: Hey! I'm doing great, thanks for having me. I'm super excited to discuss AlphaGeometry. From what I've heard, it sounds like an incredible advancement in the field. But I have to admit, a lot of this is new to me. Automated theorem proving? Geometry? It's been a while since high school math class! 

SPEAKER 1: No worries at all, that's why we're here - to break it down in an engaging and accessible way. At a high level, automated theorem proving is all about developing computer programs that can reason about mathematical statements and proofs. And geometry has been a major focus area, going back to the

### Let's write the contents of transcript to a file for either further post-processing or tts for the podcast

In [14]:
# Output the transcript to a file
with open('raw_transcript.txt', 'w', encoding='utf-8') as file:
    file.write(transcript)

print("Transcript has been saved to 'raw_transcript.txt'")

Transcript has been saved to 'raw_transcript.txt'
