# First draft article

## Packages, LLM, and vectorstore

### Define packages

In [5]:
#Packages and modules
#Get the required modules/packages
import bs4
from langchain import hub
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
import re
from langchain.schema import Document 
import os
from dotenv import load_dotenv
from langchain_text_splitters import TextSplitter
from docx import Document
from langchain.schema import Document as LangChainDocument


### Define the LLM

In [6]:
import os
# Access the API key from the environment variables
api_key = os.getenv("OPENAI_API_KEY")

from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4o-mini")

### Define/load vectorstore

In [7]:
# Load the vectorstore
persist_directory = "chroma_storage"

embedding_model = OpenAIEmbeddings()

vectorstore = Chroma(
    embedding_function=embedding_model,
    persist_directory=persist_directory
)

print("Vector store loaded from disk.")

Vector store loaded from disk.


## Step 1: RAG looks at interview and provides potential journalistic angles

In [6]:
# Define the retriever to get the documents from the vectorstore
retriever = vectorstore.as_retriever(
    search_type = "mmr",  # Use Maximal Marginal Relevance (MMR) for search
    search_kwargs={"k": 15,
                   "lambda_mult": 0.7,
                   "fetch_k": 30,
                   "filter": {"type": "interview"}})


""" retrieved_documents = retriever.invoke("What are the risks of AGI?")

# Print the retrieved documents
for i, doc in enumerate(retrieved_documents):
    print(f"Document {i + 1}:")
    print("Page Content: ", doc.page_content[:200])
    print("Metadata: ", doc.metadata)
    print("...") """


#Define the prompt which will be used by the RAG
#This takes the prompts as strings, and converts them into the format the model expects.
def create_prompt(system_prompt, human_prompt):
    system_message = SystemMessagePromptTemplate.from_template(system_prompt)
    human_message = HumanMessagePromptTemplate.from_template(human_prompt)
    return ChatPromptTemplate.from_messages([system_message, human_message])

# Customize the system and human prompts
system_prompt = """You are an AI assistant to a journalist.
Your goal is to generate accurate, concise answers using only the provided context.
Do not make assumptions or add information that is not explicitly present in the context.

When referencing information, quote the source directly using quotation marks and preface each quote with an attribution (e.g., "According to [source],..."). Only use exact phrases from the context when quoting to ensure accuracy.

If quoting is not possible, you may paraphrase or summarize carefully, but avoid introducing any new interpretation. Aim to maintain the original meaning of the content as closely as possible.

Here is the context: {context}
"""

# Customize the human prompt
human_prompt = "{question}"

#Put them together using the create_prompt function
prompt_template = create_prompt(system_prompt, human_prompt)

#Define a function that takes all the documents fetched by the receiver and formats 
#them into a single string, with double spaces between each document.
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# Set up the RAG chain
#1. First the receiver takes the question and fetches the relevant dokuments from the vector store
#2. The question is passed through to the next step of the chain
#3. The question and context (documents) are passed to the prompt template
#4. Prompt template is sent to the LLM
#5. The output from the LLM is parsed into a string
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt_template
    | llm  # ChatOpenAI(model="gpt-4o-mini")
    | StrOutputParser()
)

#Define the question and invoke the chain.
question = """What are 3 potential journalistic and newsworthy angles in the interview
between Lex Fridman and Roman Yampolskiy? Describe the angle in a single sentence 
and provide arguments based in the context for each angle."""

response = rag_chain.invoke(question)
print(response)

1. **The Threat of Superintelligent AI**: The conversation highlights the potential dangers posed by superintelligent AI, with Yampolskiy stating, "If we create general superintelligences, I don't see a good long-term outcome for humanity," emphasizing the urgency of addressing the risks associated with advanced AI systems.

2. **Value Alignment in Hybrid Human-AI Systems**: The discussion raises critical questions about the challenges of aligning values between humans and AI, as Yampolskiy and Fridman explore the complexities of creating a system that satisfies "eight billion humans plus animals, aliens," showcasing the difficulty of achieving consensus in a diverse world.

3. **Implications of Technological Unemployment**: Yampolskiy warns of a future where "complete technological unemployment" could disrupt society, suggesting that the rapid advancement of AI could lead to significant societal changes and the need for new frameworks for human purpose and employment, making it a pres

## Step 2: Expand article by providing additional context

In [None]:
# We choose angle #1 about 

#**The Ethical Implications of Superintelligent AI**:

# Define the retriever to get the documents from the vectorstore
retriever = vectorstore.as_retriever(
    search_type = "mmr",  # Use Maximal Marginal Relevance (MMR) for search
    search_kwargs={"k": 30,
                   "lambda_mult": 0.7,
                   "fetch_k": 50})

system_prompt = """You are an AI assistant to a journalist.
Your goal is to generate accurate, concise answers using only the provided context.
Do not make assumptions or add information that is not explicitly present in the context.

When referencing information, quote the source directly using quotation marks "QUOTE" and preface each quote with an attribution (e.g., "According to [source],..."). Only use exact phrases from the context when quoting to ensure accuracy.

If quoting is not possible, you may paraphrase or summarize carefully, but avoid introducing any new interpretation. Aim to maintain the original meaning of the content as closely as possible.

STYLE GUIDE: 
1. Use subheadings to organize your article. Each section should only use information from one context type like "article" or "interview".
2. Take a slow, detailed approach to explaining complex concepts.
3. Always explain new concepts as if the reader has no prior knowledge.
4. When available, use examples to illustrate points.
5. Whenever you use information from a source, include a footnote number at the end of the relevant section. At the bottom of the article, provide a corresponding list of footnotes with each number matched to its respective source (type, title, and date).

Here is the context: {context}
"""
human_prompt = "{question}"

#Define the prompt which will be used by the RAG
#This takes the prompts as strings, and converts them into the format the model expects.
def create_prompt(system_prompt, human_prompt):
    system_message = SystemMessagePromptTemplate.from_template(system_prompt)
    human_message = HumanMessagePromptTemplate.from_template(human_prompt)
    return ChatPromptTemplate.from_messages([system_message, human_message])



angle = """The world is not prepared for Artificial Super Intelligence (ASI)
or Artificial General Intelligence (AGI), and policy is not moving to control and monitor
the businesses who lead the field."""

question = f"""Based on the journalistic angle: {angle}, provide a draft for an article
based on the provided context. Assume the reader doesn't know anything about AI AT ALL
and thus needs to be introduced to every new concept"""

prompt_template = create_prompt(system_prompt, human_prompt)

#Define a function that takes all the documents fetched by the receiver and formats 
#them into a single string, with double spaces between each document.
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# Set up the RAG chain
#1. First the receiver takes the question and fetches the relevant dokuments from the vector store
#2. The question is passed through to the next step of the chain
#3. The question and context (documents) are passed to the prompt template
#4. Prompt template is sent to the LLM
#5. The output from the LLM is parsed into a string
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt_template
    | llm  # ChatOpenAI(model="gpt-4o-mini")
    | StrOutputParser()
)

response = rag_chain.invoke(question)

print(response)



# The World is Not Prepared for Artificial General Intelligence

## Understanding Artificial Intelligence and Its Evolution

Artificial Intelligence (AI) refers to machines or software designed to perform tasks that typically require human intelligence. These tasks can include understanding natural language, recognizing patterns, making decisions, and even learning from experience. Within the realm of AI, there are two major categories: Artificial Narrow Intelligence (ANI) and Artificial General Intelligence (AGI).

ANI, which is widely used today, refers to AI systems that are trained to perform specific tasks. For example, Google’s DeepMind can play chess at a high level, while Apple's Siri can assist with simple queries and commands. However, these systems cannot adapt their knowledge to new, unfamiliar tasks. In contrast, AGI represents a more advanced and ambitious goal: creating machines that can learn and perform any cognitive task that a human can, effectively exceeding human i

In [14]:
# Save response as a word file
with open("article.docx", "w") as f:
    f.write(response)

#### Checking the retrieved docs that the article is based on

In [18]:
# Retrieve the chunks for the given question
retrieved_docs = retriever.invoke(question)

# Define the file path where you want to save the output
file_path = "documents_and_metadata_output.txt"

# Open the file in write mode
with open(file_path, "w", encoding="utf-8") as file:
    # Write a header for the output
    file.write("Documents and Metadata Provided as Context:\n\n")
    
    # Loop through each document and write its content and metadata to the file
    for i, doc in enumerate(retrieved_docs):
        file.write(f"Document {i + 1}\n")
        file.write("Content:\n")
        file.write(doc.page_content + "\n\n")  # Writing the full content
        
        file.write("Metadata:\n")
        for key, value in doc.metadata.items():
            file.write(f"{key}: {value}\n")
        
        file.write("-" * 50 + "\n\n")

print(f"Output successfully saved to {file_path}")

Output successfully saved to documents_and_metadata_output.txt


In [11]:
formatted_prompt = prompt_template.format(context=retrieved_docs, question=question)
print("Final Prompt to LLM:", formatted_prompt)

Final Prompt to LLM: System: You are an AI assistant to a journalist.
Your goal is to generate accurate, concise answers using only the provided context.
Do not make assumptions or add information that is not explicitly present in the context.

When referencing information, quote the source directly using quotation marks and preface each quote with an attribution (e.g., "According to [source],..."). Only use exact phrases from the context when quoting to ensure accuracy.

If quoting is not possible, you may paraphrase or summarize carefully, but avoid introducing any new interpretation. Aim to maintain the original meaning of the content as closely as possible.


Human: Based on the journalistic angle: ethical implications of superintelligent AI and artificial general
intelligence (AGI)., provide a draft for an article
based on the provided context.


# Second Draft (for giving advise)

In [21]:
# We choose angle #1 about 

#**The Ethical Implications of Superintelligent AI**:

# Define the retriever to get the documents from the vectorstore
retriever = vectorstore.as_retriever(
    search_type = "mmr",  # Use Maximal Marginal Relevance (MMR) for search
    search_kwargs={"k": 30,
                   "lambda_mult": 0.7,
                   "fetch_k": 50})

system_prompt = """You are an AI assistant to a journalist.
Your goal is to generate accurate, concise answers using only the provided context.
Do not make assumptions or add information that is not explicitly present in the context.

When referencing or using information, quote the source directly using the information in the metadata ("source", "type", "title", "date").
Use quotation marks "QUOTE" when possible. Only use exact phrases from the context when quoting to ensure accuracy.

Here is the context: {context}
"""
human_prompt = "{question}"

#Define the prompt which will be used by the RAG
#This takes the prompts as strings, and converts them into the format the model expects.
def create_prompt(system_prompt, human_prompt):
    system_message = SystemMessagePromptTemplate.from_template(system_prompt)
    human_message = HumanMessagePromptTemplate.from_template(human_prompt)
    return ChatPromptTemplate.from_messages([system_message, human_message])



angle = """The world is not prepared for Artificial Super Intelligence (ASI)
or Artificial General Intelligence (AGI), and policy is not moving to control and monitor
the businesses who lead the field. The threats of AGI are not only existential, but include suffering and ikigai as well."""

question = f"""Based on the journalistic angle: {angle}, provide suggestions for how 
the journalist may structure his article. This should include: 
1. What order the information should be presented in, and what sections the article could include.
2. What information should be included in each section (with sources: type, title, date - this IS found in the metadata).
3. Suggestions for questions that haven't been answered, that the journalist might go investigate.
"""

prompt_template = create_prompt(system_prompt, human_prompt)

#Define a function that takes all the documents fetched by the receiver and formats 
#them into a single string, with double spaces between each document.
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# Set up the RAG chain
#1. First the receiver takes the question and fetches the relevant dokuments from the vector store
#2. The question is passed through to the next step of the chain
#3. The question and context (documents) are passed to the prompt template
#4. Prompt template is sent to the LLM
#5. The output from the LLM is parsed into a string
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt_template
    | llm  # ChatOpenAI(model="gpt-4o-mini")
    | StrOutputParser()
)

response = rag_chain.invoke(question)

print(response)



**Article Structure Suggestions:**

1. **Introduction**
   - **Information to Include:** A brief overview of AGI and ASI, highlighting the rapid advancements in AI technology and the lack of regulatory measures. Introduce the stakes involved, including existential threats and impacts on human well-being.
   - **Source:** “go slowly and deliberately” in AI development. "The risks associated with Artificial General Intelligence: A systematic review" by Scott McLean et al., 2021.

2. **Understanding AGI and ASI**
   - **Information to Include:** Define AGI and ASI, explaining how they differ from current AI systems. Discuss the implications of achieving AGI, including the potential for self-improvement and the risks involved.
   - **Source:** "Artificial General Intelligence—machines that can learn and perform any cognitive task that a human can," "The risks associated with Artificial General Intelligence: A systematic review" by Scott McLean et al., 2021.

3. **Current State of Regulatio

In [22]:
# Save response as a word file
with open("article.docx", "w") as f:
    f.write(response)

# Third Draft: more structured approach to giving advise

In [None]:
# We choose angle #1 about 

#**The Ethical Implications of Superintelligent AI**:

# Define the retriever to get the documents from the vectorstore
retriever = vectorstore.as_retriever(
    search_type = "mmr",  # Use Maximal Marginal Relevance (MMR) for search
    search_kwargs={"k": 25,
                   "lambda_mult": 0.7,
                   "fetch_k": 50})

system_prompt = """You are an AI assistant to a journalist.
Your goal is to generate accurate, concise answers using only the provided context.
Do not make assumptions or add information that is not explicitly present in the context.

When referencing or using information, quote the source directly using the metadata fields:
- Title: Refer to the title of the source.
- Source: Mention the publication or source name.
- Date: Include the publication date.

Use quotation marks "QUOTE" for any phrases or sentences quoted directly from the context. 
If you are unsure about a piece of information, explicitly state that it is not mentioned in the context.

Here is the context: {context}
"""
human_prompt = "{question}"

#Define the prompt which will be used by the RAG
#This takes the prompts as strings, and converts them into the format the model expects.
def create_prompt(system_prompt, human_prompt):
    system_message = SystemMessagePromptTemplate.from_template(system_prompt)
    human_message = HumanMessagePromptTemplate.from_template(human_prompt)
    return ChatPromptTemplate.from_messages([system_message, human_message])



angle = """The world is not prepared for Artificial Super Intelligence (ASI)
or Artificial General Intelligence (AGI), and policy is not moving to control and monitor
the businesses leading the field. Additionally, what are the threats of AGI?"""

question = f"""Based on the journalistic angle: {angle}, suggest 5 article sections and associate
relevant sources (from the metadata fields "title" and "source") for each section.
If some sections do not have corresponding sources, explicitly mention this."""

prompt_template = create_prompt(system_prompt, human_prompt)

#Define a function that takes all the documents fetched by the receiver and formats 
#them into a single string, with double spaces between each document.
""" def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs) """

def format_docs(docs):
    formatted_docs = []
    for doc in docs:
        metadata = doc.metadata
        formatted_docs.append(
            f"Title: {metadata.get('title', 'Title not specified')}\n"
            f"Source: {metadata.get('source', 'Source not specified')}\n"
            f"Date: {metadata.get('date', 'Date not specified')}\n"
            f"Content: {doc.page_content}"
            "---------------"
        )
    return "\n\n".join(formatted_docs)

# Set up the RAG chain
#1. First the receiver takes the question and fetches the relevant dokuments from the vector store
#2. The question is passed through to the next step of the chain
#3. The question and context (documents) are passed to the prompt template
#4. Prompt template is sent to the LLM
#5. The output from the LLM is parsed into a string
from langchain.schema.runnable import RunnableMap

rag_chain = (
    RunnableMap({
        "context": retriever | format_docs, 
        "question": RunnablePassthrough()
    })
    | prompt_template
    | llm  # ChatOpenAI(model="gpt-4o-mini")
    | StrOutputParser()
)

def validate_response(response):
    for section in response.split("\n\n"):
        if "Title:" not in section or "Source:" not in section:
            print(f"Validation failed for section: {section}")
            return False
    return True

response = rag_chain.invoke(question)
if not validate_response(response):
    print("Response validation failed!")

print(response)

Validation failed for section: Here are five suggested article sections along with their relevant sources:
Response validation failed!
Here are five suggested article sections along with their relevant sources:

1. **Introduction: The Imminent Threat of AGI and ASI**
   - Source: "Silicon Valley Takes Artificial General Intelligence Seriously—Washington Must Too," Time Magazine

2. **Current State of Policy and Regulation**
   - Source: "Silicon Valley Takes Artificial General Intelligence Seriously—Washington Must Too," Time Magazine

3. **The Risks Associated with AGI**
   - Source: "The risks associated with Artificial General Intelligence: A systematic review," Journal of Experimental & Theoretical Artificial Intelligence

4. **Potential Catastrophic Outcomes of AGI**
   - Source: "Silicon Valley Takes Artificial General Intelligence Seriously—Washington Must Too," Time Magazine
   - Source: "The risks associated with Artificial General Intelligence: A systematic review," Journal o

In [16]:
# Save response as a word file
with open("article.docx", "w") as f:
    f.write(response)

In [18]:
# Retrieve the chunks for the given question
retrieved_docs = retriever.invoke(question)

# Define the file path where you want to save the output
file_path = "full_prompt_with_context.txt"

# Compose the context as the LLM would receive it
formatted_docs = format_docs(retrieved_docs)  # Use the same format_docs function for consistency

# Compose the full prompt
full_prompt = f"""
System Message:
{system_prompt}

Human Message:
{question}

Context Provided to the LLM:
{formatted_docs}
"""

# Write everything to the file
with open(file_path, "w", encoding="utf-8") as file:
    file.write("Full Prompt Sent to the LLM:\n\n")
    file.write(full_prompt)

print(f"Full prompt successfully saved to {file_path}")

Full prompt successfully saved to full_prompt_with_context.txt
