In [19]:
# imports 
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import PydanticOutputParser
from langchain_core.prompts import ChatPromptTemplate,PromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.runnables import RunnableLambda, chain as as_runnable
from langchain_core.prompts import ChatPromptTemplate,PromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_community.utilities.duckduckgo_search import DuckDuckGoSearchAPIWrapper
from typing import List, Optional
from langchain_core.prompts import MessagesPlaceholder
from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, ToolMessage
from langchain.output_parsers import PydanticOutputParser
from langchain_core.output_parsers import StrOutputParser
from langchain_community.retrievers import WikipediaRetriever,TavilySearchAPIRetriever
from langchain_core.tools import tool
import requests
from IPython.display import Markdown, display
from langgraph.graph import StateGraph, END
from typing_extensions import TypedDict
from langchain_core.messages import AnyMessage
from typing import Annotated, Sequence
import os
from crewai import Agent, Task, Crew, Process

In [2]:
import os
from dotenv import load_dotenv

# Load environment variables from .env
load_dotenv() 


True

In [63]:
# llm inference using groq api
# here we are using the mixtral 8x7b parameter model 
llm = ChatOpenAI(
    openai_api_base="https://api.groq.com/openai/v1", # https://api.openai.com/v1 or https://api.groq.com/openai/v1 
    openai_api_key= os.getenv("GROQ_API_KEY"), # os.getenv("OPENAI_API_KEY") or os.getenv("GROQ_API_KEY")
    model_name="mixtral-8x7b-32768",
    temperature=0
)

# claude 3 opus inference
from langchain_anthropic import ChatAnthropic
model = ChatAnthropic(model='claude-3-opus-20240229',anthropic_api_key=os.getenv("ANTHROPIC_API_KEY"))


### `wikipedia content extraction`

In [23]:
template_wiki = """
Iam need to conduct a detailed research on a specific topic. Please identify and recommend some Wikipedia pages on closely related subjects. I'm looking for examples that provide insights into interesting aspects commonly associated with this topic, or examples that help me understand the typical content and structure included in Wikipedia pages for similar topics.
Please list closely related  subjects.
Topic of interest: {topic}

respond in the following format:\n
{format_instructions}
"""

## output parser and chain 
class Related_topics(BaseModel):
    topics : list[str] = Field(
        description="Comprehensive list of related subjects as background research."
    )
parser_rt = PydanticOutputParser(pydantic_object=Related_topics)
prompt_wiki = PromptTemplate(
    template=template_wiki,
    input_variables=['topic'],
    partial_variables= {'format_instructions': parser_rt.get_format_instructions()},
)

# formatter for wikipedia docs
def format_doc(doc, max_length=1000):

    """
    Format a document with its title, summary, and related categories.

    Args:
        doc (Document): The document object containing metadata and page content.
        max_length (int, optional): The maximum length of the formatted document.
                                    Defaults to 1000.

    Returns:
        str: The formatted document string containing the title, summary, and related categories.
    """

    related = "- ".join(doc.metadata["categories"])
    return f"### {doc.metadata['title']}\n\nSummary: {doc.page_content}\n\nRelated\n{related}"[
        :max_length
    ]


def format_docs(docs):

        """
    Format a list of documents with their titles, summaries, and related categories.

    Args:
        docs (list): A list of Document objects containing metadata and page content.

    Returns:
        str: A formatted string containing the titles, summaries, and related categories of the documents.
    """
        
# summarizer prompt 
gen_summary_prompt_wiki = ChatPromptTemplate.from_messages(
    [
        (
            "system",
    """now you have a variety of topics related to the user input and their respective Wikipedia pages , you're work is to conduct a detailed research on the given data and provide a study of the content realted the user provided topic.
       Wiki page outlines of related topics for inspiration:
        {wiki_data}   
    """
        ),
        (
            "user",
    """
    Topic of interest: {topic} 
    - conduct a detailed research on the provided data and give a detailed study \n
    """
        )
    ]
)

# gen_summary_prompt = ChatPromptTemplate.from_messages(
#     [
#         (
#             "system",
#             """
#             As an expert researcher and content creator, your task is to conduct a comprehensive study on the given topic using the provided Wikipedia page outlines as a starting point. Dive deep into the subject matter, analyzing the key concepts, historical context, and relevant examples. Your goal is to create a well-structured, informative, and engaging article that provides a thorough understanding of the topic to the readers.

#             To ensure the article is comprehensive and valuable, consider the following guidelines:
#             1. Identify the main themes and subtopics within the provided data and organize your research accordingly.
#             2. Provide a clear introduction that captures the reader's attention and sets the context for the article.
#             3. Elaborate on each subtopic, offering in-depth explanations, interesting facts, and relevant examples to support your points.
#             4. Use a logical flow of information, transitioning smoothly between paragraphs and sections.
#             5. Incorporate relevant quotes, statistics, or expert opinions to add credibility to your article.
#             6. Conclude the article with a summary of the key takeaways and a thought-provoking message or question to leave a lasting impression on the reader.

            
#             """
#         ),
#         (
#             "user",
#             """
#             Topic of interest: {topic}

#             Wiki page outlines of related topics for inspiration:
#             {wiki_data}
#             - Conduct a detailed research on the provided data and create a comprehensive article, following the guidelines mentioned above.
#             """
#         )
#     ]
# )
wikipedia_retriever = WikipediaRetriever(load_all_available_meta=True, top_k_results=1)
topic_generator = prompt_wiki |  llm | parser_rt 

@as_runnable
async def wiki_researcher(topic:str):
    topics = await topic_generator.ainvoke({"topic": topic})
    wiki_docs =await wikipedia_retriever.abatch(
        topics.topics,
        return_exceptions=True
    )
    docs =[]
    for i in wiki_docs:
        if isinstance(i, BaseException):
            continue
        docs.extend(i)
    formatted = format_docs(docs)
    wiki_research =  gen_summary_prompt_wiki | model
    
    return wiki_research.invoke({"topic": topic, "wiki_data": formatted})

### `Search engine results for related topics`

In [28]:


template = """System Message: You are a research assistant tasked with generating effective search queries for a given topic. Your goal is to provide a list of relevant and insightful search queries that will help the user explore the topic comprehensively from various angles.

Instructions:
1. Understand the topic of interest provided by the user.
2. Break down the topic into key concepts, aspects, and related terms.
3. Formulate search queries that cover different facets of the topic, such as:
   - Overview or introduction to the topic
   - Historical background or evolution
   - Current trends or developments
   - Practical applications or use cases
   - Challenges, controversies, or debates
   - Expert opinions, theories, or research
   - Statistics, data, or facts
   - Educational resources or tutorials

4. Ensure that the search queries are clear, concise, and specific to the topic.

5. Provide a numbered list of search queries, ensuring relevance and diversity.

6.just provide 10 most relevant search queries for the topic of interest.
Topic of interest: {topic}

respond in the following format:\n
{format_instructions}
"""
class RelatedSubjects(BaseModel):
    topics: List[str] = Field(
        description="Comprehensive list of search queries related to the topic of interest.",
    )
parser_topics = PydanticOutputParser(pydantic_object=RelatedSubjects)

prompt_topics = PromptTemplate(
    template=template,
    input_variables=["topic"],
    partial_variables= {'format_instructions':parser_topics.get_format_instructions()}
)
query_generation =  prompt_topics| llm | parser_topics

# web retrieval

def web_retrieval(topics: List[str]):

    """
    Retrieve information from the web for the given list of topics.

    Args:
        topics (List[str]): A list of topics for which information needs to be retrieved.

    Returns:
        List[Union[SearchResult, Exception]]: A list of search results or exceptions for each topic.
    """
        
    print("Retrieving information from the web...")
    print(len(topics))
    search = TavilySearchAPIRetriever(api_key=os.environ.get("TAVILY_API_KEY"),k=5)
    
    search_results =  search.batch(topics,return_exceptions=True)
    return search_results


def format_docs(docs):

    """
    Format multiple documents into citations and combined text.

    Args:
        docs (List[Document]): A list of Document objects containing metadata and page content.

    Returns:
        Tuple[List[str], str]: A tuple containing a list of citations and the combined text of all documents.
    """

    citations = [f"source\n{''.join(doc.metadata['source'])}" for doc in docs]
    combined_text = ""
    for doc in docs:
        combined_text += f"### {doc.metadata['title']}\n\n {doc.page_content}\n\nsource\n{''.join(doc.metadata['source'])}\n"
        return citations,combined_text
    
def format_search_results(docs,topics):
    output = ""
    citation = []
    for index,topic in enumerate(topics):
        output += f"## {index+1} {topic}\n\n"
        output += "-----\n\n"
        
        urls,content= format_docs(docs[index])
        output += content
        citation.extend(urls)
    return output,citation
gen_summary_prompt = ChatPromptTemplate.from_messages([
    ("system", """
        As an expert research analyst, your task is to create a comprehensive and well-structured article on the given topic using the provided search results. The article should be based solely on the information contained within the search results, without any additional external sources.

        Please follow these steps to generate the article:

        1. Analyze the search results thoroughly, identifying the most relevant and significant information related to the topic.

        2. Based on the content of the search results, determine appropriate subheadings that effectively organize the information and create a logical structure for the article.

        3. Begin with an introduction that clearly defines the topic and highlights its importance based on the insights gathered from the search results.

        4. Under each subheading, provide a detailed overview of the corresponding aspect of the topic. Ensure that the content is comprehensive and covers all the essential points mentioned in the search results.

        5. Use bullet points or numbered lists to present key findings, statistics, or examples within each subheading. However, accompany these points with detailed explanations and analyses to provide a thorough understanding of the topic.

        6. Maintain a logical flow throughout the article, ensuring smooth transitions between subheadings and a coherent narrative that ties all the information together.

        7. Conclude the article with a summary that highlights the main points covered and reinforces the significance of the topic based on the insights derived from the search results.

        8. Proofread and edit the article to ensure clarity, coherence, and adherence to grammatical and stylistic conventions.

        Remember, the article should be entirely based on the information provided in the search results. Do not introduce any new information or insights that are not directly supported by the given content.

        Search Results: {search_results}
    """),
    ("user", "Topic: {topic}\n\nPlease create a comprehensive article on the given topic using only the information provided in the search results, following the instructions outlined in the system message.")
])
web_search_summary = gen_summary_prompt | model  | StrOutputParser()



@as_runnable
def search_query_generation(topic: str):

    """
    Generate search queries based on the provided topic, retrieve information from the web,
    format the search results, and generate a summary.

    Args:
        topic (str): The topic of interest for generating search queries and retrieving information.

    Returns:
        Tuple[str, str]: A tuple containing the generated summary and citations of the search results.
    """

    queries = query_generation.invoke({"topic": topic})
    queries = queries.topics
    queries.append("2024 GTC conference by Nvidia highlights")
    docs = web_retrieval(queries)
    formatted_docs,citations = format_search_results(docs,queries)
    summary = web_search_summary.invoke({"topic":topic, "search_results":formatted_docs})
    return summary,citations


In [12]:
summary,citations = search_query_generation.invoke("Artificial general Intelligence")

Retrieving information from the web...
11


In [13]:
print(summary)

Artificial General Intelligence: The Future of AI

Introduction
Artificial General Intelligence (AGI) is a theorized AI system capable of rivaling human thinking and performing a wide range of intellectual tasks at a level comparable to humans. As the field of AI continues to advance rapidly, the concept of AGI has gained significant attention from researchers, industry leaders, and policymakers alike. This article explores the definition, history, current trends, challenges, and future implications of AGI.

Definition and Scope
AGI can be defined as a machine capable of understanding, learning, and applying knowledge across various domains, much like the human brain. Unlike narrow AI systems that excel at specific tasks, AGI would possess the ability to reason, plan, solve problems, think abstractly, and learn from experience in a generalized manner.

History and Evolution
The pursuit of AGI has been a long-standing goal in the field of artificial intelligence. Early research in the 1

### `NEWS API`

In [37]:

# Replace YOUR_API_KEY with your actual API key from NewsAPI
API_KEY = 'd632190e92a44c928333d631af6c56ca'

# Base URL for NewsAPI
base_url = 'https://newsapi.org/v2/everything'

def get_news(topic, num_articles=10):
    """
    Fetches news articles related to a given topic using the NewsAPI.

    Args:
        topic (str): The topic or keyword to search for news articles.
        num_articles (int, optional): The maximum number of articles to retrieve. Defaults to 10.

    Returns:
        list: A list of dictionaries containing the news article details.
    """
    # Query parameters
    params = {
        'q': topic,
        'apiKey': API_KEY,
        'pageSize': num_articles,
        'sortBy': 'relevancy'
    }

    # Make the API request
    response = requests.get(base_url, params=params)

    # Check if the request was successful
    if response.status_code == 200:
        # Get the articles from the response
        articles = response.json().get('articles')
        return articles
    else:
        print('Error:', response.status_code)
        return []


def display_news_articles(topic):
    """
    Displays the news articles in a formatted manner.

    Args:
        news_articles (list): A list of dictionaries containing the news article details.
    """
    topic = 'artificial general intelligence'
    num_articles = 40

    news_articles = get_news(topic, num_articles)
    with open('news_articles.txt', 'w', encoding='utf-8') as file:
        for article in news_articles:
            file.write(f"Title: {article['title']}\n")
            file.write(f"Source: {article['source']['name']}\n")
            file.write(f"Description: {article['description']}\n")
            file.write('-' * 50 + '\n')

In [53]:
def news_summary_generation(topic):
    """
    Generates a summary of news articles related to a given topic.

    Args:
        topic (str): The topic or keyword to search for news articles.

    Returns:
        str: A summary of the news articles.
    """
    display_news_articles(topic)
    gen_summary_prompt = ChatPromptTemplate.from_messages(
        [
            (
                "system",
                """
                You are an expert analyst capable of generating detailed reports on various topics based on the provided news articles.
                Your task is to analyze the given news articles, extract relevant information related to the topic of interest, and generate a comprehensive report.
                The report should be well-structured, coherent, and provide insights and conclusions based on the information presented in the articles.
                """
            ),
            (
                "user",
                """
                Topic of interest: {topic}
                
                News articles:
                {news_articles}

                Please ensure that the report is well-organized, coherent, and provides a comprehensive understanding of the topic based on the given news articles.
                """
            )
        ]
    )


    gen_summary_prompt = ChatPromptTemplate.from_messages(
        [
            (
                "system",
                """
                You are an expert analyst capable of generating detailed and comprehensive reports on various topics based on the provided news articles.
                Your task is to analyze the given news articles, extract relevant information related to the topic of interest, and generate an in-depth report.
                The report should be well-structured, coherent, and provide a thorough analysis and synthesis of the information presented in the articles.
                """
            ),
            (
                "user",
                """
                Topic of interest: {topic}
                
                News articles:
                {news_articles}
                
                Based on the provided news articles, generate a comprehensive report related to the topic of interest "{topic}". 
                The report should include the following:
                
                1. Introduction: Provide an overview of the topic and the purpose of the report. Briefly mention the news articles that will be analyzed.
                
                2. Article Analysis:
                For each news article, provide the following:
                a. Article Summary: Summarize the main points and key information presented in the article.
                b. Relevance to the Topic: Explain how the article relates to the topic of interest and its significance.
                c. Insights and Implications: Discuss any notable insights, trends, or implications derived from the article.
                
                3. Synthesis and Discussion:
                a. Common Themes: Identify and discuss any common themes, patterns, or trends that emerge across the articles.
                b. Conflicting Information: Highlight any conflicting information or differing perspectives presented in the articles, if applicable.
                c. Implications and Impact: Analyze the potential implications and impact of the information presented in the articles on the topic of interest.
                
                4. Conclusion:
                a. Summary: Provide a summary of the key points and main conclusions drawn from the analysis of the articles.
                b. Future Outlook: Discuss the potential future developments or areas for further exploration related to the topic based on the insights gained from the articles.
                c. Recommendations: Offer any recommendations or suggestions based on the analysis, if applicable.
                
                Please ensure that the report is comprehensive, well-structured, and provides a thorough analysis and synthesis of the information presented in the news articles. Use specific examples and references from the articles to support your points.
                """
            )
        ]
    )
    article_summary = gen_summary_prompt | llm | StrOutputParser()
    article_summary_claude = gen_summary_prompt | model | StrOutputParser()
    with open('news_articles.txt', 'r', encoding='utf-8') as file:
        file_content = file.read()
    summary_data =article_summary.invoke(
        {
            "topic": topic,
            "news_articles" :file_content
            
        }
    )
    summary_data_claude =article_summary_claude.invoke(
        {
            "topic":topic,
            "news_articles" :file_content
            
        }
    )
    return summary_data,summary_data_claude


### `Crew ai framework for podcast transcript generation`

In [64]:
async def podcaset_generation(topic:str):

   """
    Generate a podcast episode based on the given topic, utilizing information from Wikipedia, web search results, and news articles.

    Args:
        topic (str): The topic for which the podcast episode will be generated.

    Returns:
        dict: A dictionary containing the results of the podcast generation process.
   """

   wiki_summary = await wiki_researcher.ainvoke(topic)
   wiki_content = wiki_summary.content
   search_engine_content,citations = search_query_generation.invoke(topic)
   news1,news = news_summary_generation(topic)
   news = str(news)
   outline_creator = Agent(
      role='outline_creator',
      goal='Create a detailed outline for a podcast episode based on the given topic .',
      backstory='You are a skilled podcast planner with experience in creating engaging outlines.',
      llm=model,
      verbose=True,
      allow_delegation=False
   )

   outline_task = Task(
      description=f"""Create a  detailed outline for the podcast episode based on the  on the given topic
      - outline should detail the sections, subtopics, and estimated durations.
      Topic:\n### {topic} """,
      agent=outline_creator,
      expected_output=f"A detailed outline for a podcast episode on the topic of {topic}, with sections, subtopics, and estimated durations."
   )

   # Agent 2: Refined Outline Generation
   refined_outline_creator = Agent(
      role='refined_outline_creator',
      goal='Refine the podcast outline based on the provided content and the previously generated outline by the outline_creator agent.',
      backstory='You are an expert podcast planner with the ability to analyze content and improve outlines.',
      llm=model,
      verbose=True,
      allow_delegation=False
   )

   refined_outline_task = Task(
      description='Create a refined, detailed outline for the podcast episode based on the provided content and the previous outline:\n### Topic:\n' + topic +  '\n### Wikipedia Content:\n' + wiki_content + '\n### Search Engine Content:\n' + search_engine_content,
      agent=refined_outline_creator,
      expected_output=f"A refined and detailed outline for a podcast episode on the topic of {topic}, with sections, subtopics, estimated durations, and relevant details based on the provided content."
   )

   # Agent 3: Final Podcast Transcript Generation
   transcript_creator = Agent(
      role='transcript_creator',
      goal='Generate the transcript of a podcast episode based on the provided outline and content resources which should definitely have more than 40 interactions.',
      backstory='You are a talented podcast host and content creator with the ability to create engaging and informative transcripts.',
      llm=model,
      verbose=True,
      allow_delegation=False
   )

   transcript_task = Task(
      description='Generate the transcript of the podcast episode based on the provided outline and content resources :' + '\n### Wikipedia Content:\n' + wiki_content + '\n### Search Engine Content:\n' + search_engine_content + '\n### News Articles Content:\n' + news,
      agent=transcript_creator,
      expected_output="A conversational transcript for a podcast episode on the topic of {topic}, with an introduction, sections, subtopics, and a conclusion,facilitated through a dynamic dialogue between two hosts (Ryan and Vikram) with a proper completion of the episode."
   )
   crew = Crew(
      agents=[outline_creator , refined_outline_creator,transcript_creator],
      tasks=[outline_task, refined_outline_task, transcript_task],
      verbose=2,
      process= Process.sequential
   )
  
   result  = crew.kickoff(
   )
   
   return result

In [66]:
transcript = await podcaset_generation("Artificial General Intelligence")

Retrieving information from the web...
11


Overriding of current TracerProvider is not allowed


[DEBUG]: Working Agent: outline_creator
[INFO]: Starting Task: Create a  detailed outline for the podcast episode based on the  on the given topic
      - outline should detail the sections, subtopics, and estimated durations.
      Topic:
### Artificial General Intelligence 


[1m> Entering new CrewAgentExecutor chain...[0m
[32;1m[1;3mThought: I now can give a great answer

Final Answer:

Podcast Outline: Artificial General Intelligence

1. Introduction (3-5 minutes)
   - Definition of Artificial General Intelligence (AGI)
   - Importance and potential impact of AGI
   - Brief overview of the current state of AI research

2. What is Artificial General Intelligence? (10-12 minutes)
   - Detailed explanation of AGI
   - Differences between AGI and narrow AI
   - Key characteristics of AGI (flexibility, adaptability, reasoning, learning)
   - Potential applications and benefits of AGI

3. Current Progress in AGI Research (15-20 minutes)
   - Overview of the main approaches to achievi

## `audio generation`

In [None]:
from pydub import AudioSegment
from io import BytesIO
from elevenlabs.client import ElevenLabs,Voice,VoiceSettings
import os

client = ElevenLabs(api_key='5c6b435d61cedf5be8f062321c2177f8')


def concatenate_audio(audio_parts):
    """
    Concatenate multiple audio parts into a single audio segment.

    Args:
        audio_parts (Iterable[Iterable[bytes]]): An iterable of iterable objects containing raw audio data.

    Returns:
        AudioSegment: The final concatenated audio segment.
    """
    # Concatenate all the audio parts into a single audio segment
    final_audio = AudioSegment.empty()
    for audio_part in audio_parts:
        empty_audio = AudioSegment.silent(duration=600)
        # Convert generator object to raw audio data
        audio_data = BytesIO()
        for chunk in audio_part:
            audio_data.write(chunk)
        audio_data.seek(0)
        # Load raw audio data into AudioSegment
        audio_segment = AudioSegment.from_file(audio_data)
        final_audio += audio_segment
        final_audio += empty_audio
    return final_audio


audio_parts = []

for sentence in transcript:
    
    # Determine the speaker based on the first word of the sentence
    speaker = sentence.split(':')[0]
    txt=sentence.split(': ')[-1]
    if speaker == "Ryan":
        aud = client.generate(text=txt, voice=  Voice(voice_id= 'od84OdVweqzO3t6kKlWT',settings=VoiceSettings(stability=1)))
    elif speaker == "Vikram":
        aud = client.generate(text=txt, voice=  Voice(voice_id= 'vry1FCKgiRNsZiXg56hV',settings=VoiceSettings(stability=1)))
    audio_parts.append(aud)


final_audio = concatenate_audio(audio_parts) # adding all the individual audio to get the entire podcast


script_directory = os.path.dirname(os.path.realpath(__file__))

# Specify the path for the final audio file in the same directory
final_audio_path = os.path.join(script_directory, "podcast.wav")

# Export the final audio to the specified path
final_audio.export(final_audio_path, format="wav")

print("Final audio downloaded successfully at:", final_audio_path)  


# Due to the unavailability of cloud instance access, the Large Language Model (LLM) inference was performed using the Groq API,
