### AI-Powered Podcast Creation and Optimization Workflow


In [1]:
import os
from langgraph.graph import END, StateGraph
from langchain_core.messages import BaseMessage, HumanMessage
from langchain_ollama.llms import OllamaLLM
from langchain_core.prompts import ChatPromptTemplate
from typing import TypedDict


For example, replace imports like: `from langchain_core.pydantic_v1 import BaseModel`
with: `from pydantic import BaseModel`
or the v1 compatibility namespace if you are working in a code base that has not been fully upgraded to pydantic 2 yet. 	from pydantic.v1 import BaseModel

  from langgraph.serde.jsonplus import JsonPlusSerializer


In [6]:
# Define the state of the podcast at various stages
class PodcastState(TypedDict):
    main_text: BaseMessage
    key_points: BaseMessage
    script_essence: BaseMessage
    enhanced_script: BaseMessage


# Define the podcast workflow
class PodcastCreationWorkflow:
    def __init__(self, timestamp=None, api_base="http://localhost:11434", api_key=None):
        self.api_base = api_base
        self.api_key = api_key
        self.model = "llama3"  # Ollama model name
        self.summarizer_model = self._create_ollama_model(temperature=0)
        self.scriptwriter_model = self._create_ollama_model(temperature=0)
        self.enhancer_model = self._create_ollama_model(temperature=0.7)
        self.timestamp = timestamp

        # Load system prompts
        self.summarizer_system_prompt = self.load_prompt("prompts/summarizer_prompt.txt", self.timestamp)
        self.scriptwriter_system_prompt = self.load_prompt("prompts/scriptwriter_prompt.txt", self.timestamp)
        self.enhancer_system_prompt = self.load_prompt("prompts/enhancer_prompt.txt", self.timestamp)

    def _create_ollama_model(self, temperature):
        return OllamaLLM(
            base_url=self.api_base,
            model=self.model,
            temperature=temperature
        )

    @staticmethod
    def load_prompt(file_path, timestamp=None):
        try:
            # Try to get the current directory using __file__
            current_dir = os.path.dirname(os.path.abspath(__file__))
        except NameError:
            # If __file__ is not defined (e.g., in interactive environments)
            current_dir = os.getcwd()

        root_dir = os.path.dirname(current_dir)

        if timestamp:
            prompt_history_dir = os.path.join(root_dir, "prompt_history")
            base_filename = os.path.basename(file_path)
            history_file = f"{base_filename}_{timestamp}"
            history_path = os.path.join(prompt_history_dir, history_file)

            if os.path.exists(history_path):
                with open(history_path, 'r', encoding='utf-8') as file:
                    return file.read().strip()

        with open(file_path, 'r', encoding='utf-8') as file:
            return file.read().strip()

    def run_summarizer(self, state: PodcastState) -> PodcastState:
        text = state["main_text"].content

        if not text:
            raise ValueError("The main_text content is empty.")

        print("Summarizing the text to extract key points...")
        prompt = ChatPromptTemplate.from_messages([
            ("system", self.summarizer_system_prompt),
            ("human", f"{text}")
        ])
        chain = prompt | self.summarizer_model
        response = chain.invoke({"text": text})
        key_points = response.strip()

        state["key_points"] = HumanMessage(content=key_points)
        return state

    def run_scriptwriter(self, state: PodcastState) -> PodcastState:
        key_points = state["key_points"].content

        if not key_points:
            raise ValueError("No key points found to generate the script.")

        print("Generating script essence from key points...")
        prompt = ChatPromptTemplate.from_messages([
            ("system", self.scriptwriter_system_prompt),
            ("human", f"{key_points}")
        ])
        chain = prompt | self.scriptwriter_model
        response = chain.invoke({"key_points": key_points})
        script_essence = response.strip()

        state["script_essence"] = HumanMessage(content=script_essence)
        return state

    def run_enhancer(self, state: PodcastState) -> PodcastState:
        script_essence = state["script_essence"].content

        if not script_essence:
            raise ValueError("No script essence found to enhance.")

        print("Enhancing script with dialogue and banter...")
        prompt = ChatPromptTemplate.from_messages([
            ("system", self.enhancer_system_prompt),
            ("human", f"{script_essence}")
        ])
        chain = prompt | self.enhancer_model
        response = chain.invoke({"script_essence": script_essence})
        enhanced_script = response.strip()

        state["enhanced_script"] = HumanMessage(content=enhanced_script)
        return state

    def create_workflow(self) -> StateGraph:
        workflow = StateGraph(PodcastState)
        workflow.set_entry_point("summarizer")
        workflow.add_node("summarizer", self.run_summarizer)
        workflow.add_node("scriptwriter", self.run_scriptwriter)
        workflow.add_node("enhancer", self.run_enhancer)
        workflow.add_edge("summarizer", "scriptwriter")
        workflow.add_edge("scriptwriter", "enhancer")
        workflow.add_edge("enhancer", END)

        return workflow

In [7]:
# Read the sample podcast text file
file_path = "sample_podcast.txt"  # Ensure this file is in the same directory or provide the correct path

with open(file_path, 'r', encoding='utf-8') as file:
    podcast_content = file.read()

In [8]:
podcast_content

"Welcome to our podcast, where we dive into the latest advancements in artificial intelligence and machine learning. Today, we're discussing how AI is transforming industries like healthcare, finance, and education.\n\nIn healthcare, AI-powered systems are improving diagnostic accuracy, helping doctors make better decisions. In finance, algorithms are being used to detect fraudulent transactions and manage investments more efficiently. Meanwhile, in education, AI is revolutionizing personalized learning by adapting to each student's needs.\n\nLater in the show, we'll talk to experts in the field and explore some of the challenges and ethical concerns surrounding the rapid development of AI. Stay tuned for a deep dive into the exciting world of AI and machine learning.\n"

In [11]:
# Example usage of the PodcastCreationWorkflow
workflow = PodcastCreationWorkflow(api_base="http://localhost:11434")

# Initialize the PodcastState with some text
state = PodcastState(
    main_text=HumanMessage(content=podcast_content),
    key_points=None,
    script_essence=None,
    enhanced_script=None
)

# Run the workflow
state = workflow.run_summarizer(state)
state = workflow.run_scriptwriter(state)
state = workflow.run_enhancer(state)

# Display the enhanced podcast script
print("Enhanced Script:")
dialogue_text = state["enhanced_script"].content

Summarizing the text to extract key points...
Generating script essence from key points...
Enhancing script with dialogue and banter...
Enhanced Script:


In [13]:
print(dialogue_text)

Here's the rewritten dialogue:

Host: Welcome to today's episode on the transformative power of artificial intelligence. Imagine a world where medical diagnoses are more accurate, financial transactions are secure, and personalized learning is tailored to each student's needs.

Guest: That sounds like a utopian future! But how exactly is AI achieving this?

Host: Well, in healthcare, AI algorithms analyze vast amounts of data to help doctors make better decisions. It's like having a superpowered assistant that can sift through medical records, identify patterns, and provide insights that human doctors might miss.

Guest: That's incredible. What about finance? How does AI impact the way we manage our money?

Host: In finance, AI algorithms detect fraudulent transactions and manage investments efficiently. It's like having a hawk-eyed accountant that can spot suspicious activity and make smart investment decisions in real-time.

Guest: Wow, I never thought about it that way. And what abo

### Text to Speech 

In [21]:
import torch
from transformers import AutoProcessor, AutoModel
import scipy.io.wavfile as wavfile
import numpy as np
from IPython.display import Audio
import io

# Load model and processor
device = "cuda" if torch.cuda.is_available() else "cpu"
processor = AutoProcessor.from_pretrained("suno/bark")
model = AutoModel.from_pretrained("suno/bark").to(device)

pytorch_model.bin:   4%|3         | 157M/4.49G [00:00<?, ?B/s]

Error while downloading from https://cdn-lfs.hf.co/repos/76/1e/761e5da7cc6a43df30e6091397aa6748ca4c7558722f8f960f355f87a51b4750/4e3d407b9b3b619da184c85786c88e5e35f90f9089303e16db696ed0be477989?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27pytorch_model.bin%3B+filename%3D%22pytorch_model.bin%22%3B&response-content-type=application%2Foctet-stream&Expires=1729087601&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcyOTA4NzYwMX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5oZi5jby9yZXBvcy83Ni8xZS83NjFlNWRhN2NjNmE0M2RmMzBlNjA5MTM5N2FhNjc0OGNhNGM3NTU4NzIyZjhmOTYwZjM1NWY4N2E1MWI0NzUwLzRlM2Q0MDdiOWIzYjYxOWRhMTg0Yzg1Nzg2Yzg4ZTVlMzVmOTBmOTA4OTMwM2UxNmRiNjk2ZWQwYmU0Nzc5ODk%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qJnJlc3BvbnNlLWNvbnRlbnQtdHlwZT0qIn1dfQ__&Signature=c29%7ELVcsFls1BwA43NSfSEHnbLP7JwPpZKTCWz7HJUGetjco3uOHRD7pf-t7kHsq0BwBtTv2qoGoMbU5WKOsA-cuKyzGr5z-LIidMb-CFoHoodNHEMn1RfDkLVxpDd-BbRluCjwbjD4mjUp8QoDlpFEN7o2ECvwBd4tfTLfV4GRSfFIrrbHQPehU

pytorch_model.bin:  19%|#8        | 849M/4.49G [00:00<?, ?B/s]

  self.register_buffer("padding_total", torch.tensor(kernel_size - stride, dtype=torch.int64), persistent=False)


generation_config.json:   0%|          | 0.00/4.91k [00:00<?, ?B/s]

In [22]:
def generate_audio(text, voice_preset="v2/en_speaker_6"):
    """
    Generate audio using the Bark model.

    Args:
    text (str): The text to convert to speech.
    voice_preset (str): The voice preset to use.

    Returns:
    numpy.ndarray: The generated audio as a numpy array.
    """
    inputs = processor(text, voice_preset=voice_preset)
    speech_output = model.generate(**inputs.to(device))
    audio_array = speech_output.cpu().numpy().squeeze()
    return audio_array


In [23]:
def create_podcast_audio(dialogue_text):
    """
    Creates an audio podcast from the given dialogue text using Bark TTS.

    Args:
    dialogue_text (str): The dialogue text to convert to audio.

    Returns:
    bytes: The generated podcast audio as bytes.
    """
    dialogue_pieces = dialogue_text.split('\n')
    audio_pieces = []

    for piece in dialogue_pieces:
        if ':' not in piece:
            continue
        speaker, text = piece.split(':', 1)
        voice_preset = "v2/en_speaker_6" if speaker.strip() == "Host" else "v2/en_speaker_9"
        audio_array = generate_audio(text.strip(), voice_preset)
        audio_pieces.append(audio_array)

    # Concatenate all audio pieces
    combined_audio = np.concatenate(audio_pieces)

    # Normalize audio
    combined_audio = np.int16(combined_audio / np.max(np.abs(combined_audio)) * 32767)

    # Save to BytesIO object
    buffer = io.BytesIO()
    wavfile.write(buffer, 24000, combined_audio)
    buffer.seek(0)

    return buffer.getvalue()

In [None]:
# Generate the podcast audio
podcast_audio = create_podcast_audio(dialogue_text)

# Optionally, save the audio to a file
with open("podcast_output.wav", "wb") as f:
    f.write(podcast_audio)
print("Podcast audio saved as podcast_output.wav")

In [None]:
# Display the audio in the notebook
Audio(podcast_audio, rate=24000)