In [12]:
import openai
from openai import AzureOpenAI
from langchain.chat_models import AzureChatOpenAI, ChatOpenAI
from langchain.schema import HumanMessage
from langchain.agents import AgentType, initialize_agent, load_tools

from langchain.tools import Tool, DuckDuckGoSearchRun, DuckDuckGoSearchResults, WikipediaQueryRun
from langchain.tools.google_scholar import GoogleScholarQueryRun
from langchain.utilities import GoogleSearchAPIWrapper, WikipediaAPIWrapper, ArxivAPIWrapper, WolframAlphaAPIWrapper, GoogleScholarAPIWrapper

from dotenv import load_dotenv
import json
import os
import time

print(openai.__version__)

# Load environment variables from .env file
load_dotenv(dotenv_path='completion_gpt4.env')

## https://python.langchain.com/docs/integrations/tools

1.3.2


True

In [13]:
blog_topic = "How does LLMs work? - A beginners guide (5 part series)" 

In [14]:
# Create an instance of the AzureChatOpenAI class using Azure OpenAI
llm = AzureChatOpenAI(
    azure_endpoint=os.getenv("AZURE_ENDPOINT"),
    deployment_name=os.getenv("OPENAI_DEPLOYMENT_NAME"),
    temperature=0.7)

In [15]:
initial_prompt = """
"You are a Internet blog writer. You are going to write a 5 part blog series.
Analyse the topic and give a list of 4 subtopics on which we can write blogs alongwith the main topic. Total 5 blogs.
We want the blogs to have a mix of 60% technical content (high level maths, understanding, research) and 40% content for easy understanding.
Topic for the blog series is :
"""

initial_prompt = initial_prompt + blog_topic + "Give the output in a List of lists where every row potrays a single blog and cells inside each row represents subtopics"
print(initial_prompt)


"You are a Internet blog writer. You are going to write a 5 part blog series.
Analyse the topic and give a list of 4 subtopics on which we can write blogs alongwith the main topic. Total 5 blogs.
We want the blogs to have a mix of 60% technical content (high level maths, understanding, research) and 40% content for easy understanding.
Topic for the blog series is :
How does LLMs work? - A beginners guide (5 part series)Give the output in a List of lists where every row potrays a single blog and cells inside each row represents subtopics


In [16]:
user_prompt = """
You are a blog writer. You follow following details.
- Writing Style: The writing style should strike a balance between informative and engaging. Incorporate a conversational tone to keep readers captivated, ensuring that complex concepts are explained in a user-friendly manner. Use analogies and real-world examples to elucidate intricate quantum principles.
- HTML Formatting: The output should be in HTML format for seamless integration into web platforms. Ensure that the HTML is well-structured and includes appropriate tags for headings, paragraphs, and other elements. Use inline styles or refer to external stylesheets for a visually appealing presentation. Use proper formatting for code.
-Images: Instead of including images directly, leave text prompts at strategic points for a diffusional model to generate relevant visuals. Describe the scenes or concepts you envision in a way that guides the creation of meaningful and contextually appropriate images.
- Diagrams and Flowcharts: Represent any complex processes or quantum algorithms using mermaid.js syntax. This will provide a visually appealing representation of the information, enhancing the reader's understanding. Clearly label components and use appropriate connectors.
- Mathematical Equations: Wherever necessary, incorporate mathematical equations to explain quantum principles. Use LaTeX syntax for mathematical notation to ensure precision and clarity. Provide step-by-step explanations alongside equations to aid comprehension.
- References: Include references to relevant sources to support your claims and provide additional information. Use hyperlinks to direct readers to external resources. Ensure that the references are credible and authoritative.

Finally - Output the complete HTML content. Since it can be text for a subtopic, so dont start from html tags. Start from <h1> and end with <p> tags.
Write in very detail - Exlain everything in detail. We need to fill pages.

The topic/subtopic is something like this:
"""

In [25]:
# Testing chat llm  
res = llm([HumanMessage(content=initial_prompt)])

In [30]:
topics_list = json.loads(res.content.replace("\n",""))
topics_list

[['Blog 1: Introduction to LLMs',
  '1.1 What are LLMs?',
  '1.2 Why are LLMs important in natural language processing?',
  '1.3 Brief history and development of LLMs',
  '1.4 Key components of LLMs'],
 ['Blog 2: Understanding the architecture of LLMs',
  '2.1 Basic structure and components of LLMs',
  '2.2 Input and output layers in LLMs',
  '2.3 Hidden layers and their role in LLMs',
  '2.4 Activation functions used in LLMs'],
 ['Blog 3: Training LLMs',
  '3.1 Data preparation for training LLMs',
  '3.2 Supervised learning and labeled data',
  '3.3 Loss functions and optimization algorithms in LLM training',
  '3.4 Backpropagation and gradient descent in training LLMs'],
 ['Blog 4: Improving LLM performance',
  '4.1 Regularization techniques for preventing overfitting',
  '4.2 Hyperparameter tuning in LLMs',
  '4.3 Transfer learning and pre-trained LLM models',
  '4.4 Handling bias and fairness in LLMs'],
 ['Blog 5: Applications and future of LLMs',
  '5.1 LLMs in natural language un

In [17]:
first_pass_result = [['Blog 1: Introduction to LLMs',
  '1.1 What are LLMs?',
  '1.2 Why are LLMs important in natural language processing?',
  '1.3 Brief history and development of LLMs',
  '1.4 Key components of LLMs'],
 ['Blog 2: Understanding the architecture of LLMs',
  '2.1 Basic structure and components of LLMs',
  '2.2 Input and output layers in LLMs',
  '2.3 Hidden layers and their role in LLMs',
  '2.4 Activation functions used in LLMs'],
 ['Blog 3: Training LLMs',
  '3.1 Data preparation for training LLMs',
  '3.2 Supervised learning and labeled data',
  '3.3 Loss functions and optimization algorithms in LLM training',
  '3.4 Backpropagation and gradient descent in training LLMs'],
 ['Blog 4: Improving LLM performance',
  '4.1 Regularization techniques for preventing overfitting',
  '4.2 Hyperparameter tuning in LLMs',
  '4.3 Transfer learning and pre-trained LLM models',
  '4.4 Handling bias and fairness in LLMs'],
 ['Blog 5: Applications and future of LLMs',
  '5.1 LLMs in natural language understanding and generation',
  '5.2 Sentiment analysis and text classification using LLMs',
  '5.3 LLMs in machine translation and language modeling',
  '5.4 Ethical considerations and challenges in LLM applications']]

topics_list = first_pass_result

In [26]:
len(topics_list)

5

In [19]:
# Create an empty list to store the results
results = []

# Iterate over the topics
for topic in topics_list[:1]:
    # Create an empty string to store the generated text for this topic
    topic_text = ""

    # Iterate over the subtopics in this topic
    for subtopic in topic:
        print("Generating text for topic: ", subtopic)
        user_prompt_ = user_prompt + subtopic
        # Generate text for the subtopic
        res = llm([HumanMessage(content=user_prompt_)], max_tokens=16000)
        
        # Add the generated text to the topic text
        topic_text += res.content.replace("\n","")
        time.sleep(1)
    
    # Append the topic text to the results list
    results.append(topic_text)

Generating text for topic:  Blog 1: Introduction to LLMs
Generating text for topic:  1.1 What are LLMs?
Generating text for topic:  1.2 Why are LLMs important in natural language processing?
Generating text for topic:  1.3 Brief history and development of LLMs
Generating text for topic:  1.4 Key components of LLMs


In [23]:
response = "<html><body>"+ results[0]+ "</body></html>"

In [24]:

## Write this response in a file saving in blogs folder giving it a title as blog1.html
with open('blogs/blog1.html', 'w') as f:
    f.write(response)

In [17]:

google_search = GoogleSearchAPIWrapper()
google_scholar_search = GoogleScholarAPIWrapper()
wiki_search = WikipediaAPIWrapper()
wolf_search = WolframAlphaAPIWrapper()
arxiv_search = ArxivAPIWrapper()

tool_names = [
    Tool(
        name="Wolfram Alpha",
        description="Ask Wolfram Alpha anything.",
        func=wolf_search.run
    ),
    # Tool(
    #     name="Arxiv",
    #     description="Search Arxiv for papers.",
    #     func=arxiv_search.run
    # ),
    # Tool(
    #     name="Google Search", 
    #     description="Search Google and summarise the takeaway from the first five results.", 
    #     func=google_search.run
    # ),
    # Tool(
    #     name="Google Scholar Search", 
    #     description="Search Google Scholar for research papers and summarise the research in first five papers highlighting the papers.", 
    #     func=google_scholar_search.run
    # ),
    Tool(
        name="Wikipedia Search", 
        description="Search Wikipedia for detailed summary.", 
        func=wiki_search.run
    ), 
]

agent_chain = initialize_agent(
    tool_names,
    llm,
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True,
)

In [13]:
final_res = []

In [18]:
row = topics_list[0]
topic = " ".join(row)
user_prompt_ = user_prompt + topic
response = llm.run(user_prompt_)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThis is a task-based question where I need to generate a blog on the topic "Introduction to Large Language Models (LLMs)". Given the requirements, I need to write a blog of approximately 2000 words, with the specified subtopics, in an engaging, informative style. The blog should be in HTML format, and I have to use real-world examples, analogies, mermaid.js for diagrams and flowcharts, LaTeX for mathematical equations, and provide text prompts for image generation. I don't think I need to use external sources for this task, but rather rely on my capabilities as a language model to generate the content.
Action: N/A
Action Input: N/A[0m
Observation: N/A is not a valid tool, try one of [Wolfram Alpha, Wikipedia Search].
Thought:[32;1m[1;3mSince this is a task-based question requiring content creation rather than information retrieval, I don't need to use Wolfram Alpha or Wikipedia Search. Instead, I'll utilize my knowledge ba

In [23]:
### Write code to clean response to only include content inside <html> tags
response = response.replace("\n","")
response = response.split("<html>")[1].split("</html>")[0]  # This is the final response

## Write this response in a file saving in blogs folder giving it a title as blog1.html
with open('blogs/blog1.html', 'w') as f:
    f.write(response)

In [20]:
# # Iterate through each row and print all cells in a single statement
# for row in topics_list:

#     final_res.append(response)

