<a href="https://colab.research.google.com/github/elf-mouse/ai-journalist/blob/main/Gemini_Journalist.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install newspaper3k
!pip install -qU google-generativeai



In [None]:
import requests
from bs4 import BeautifulSoup
import newspaper
from newspaper import Article
import ast
import google.generativeai as genai
from google.generativeai import GenerationConfig

GEMINI_MODEL = "gemini-1.5-pro-latest"
GEMINI_API_KEY = "YOUR API KEY"  # Replace with your Gemini API key
SERP_API_KEY = "YOUR API KEY"  # Replace with your SERP API key

genai.configure(api_key=GEMINI_API_KEY, transport='rest')
safety_settings = [
    {
        "category": "HARM_CATEGORY_DANGEROUS",
        "threshold": "BLOCK_NONE",
    },
    {
        "category": "HARM_CATEGORY_HARASSMENT",
        "threshold": "BLOCK_NONE",
    },
    {
        "category": "HARM_CATEGORY_HATE_SPEECH",
        "threshold": "BLOCK_NONE",
    },
    {
        "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
        "threshold": "BLOCK_NONE",
    },
    {
        "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
        "threshold": "BLOCK_NONE",
    },
]
generation_config = GenerationConfig(candidate_count=1, stop_sequences=None, max_output_tokens=None, temperature=0.5, top_p=0.9, top_k=40)

def get_search_terms(topic):
    term_count = 3;
    system_prompt = f"You are a world-class journalist. Generate a list of {term_count} search terms to search for to research and write an article about the topic."
    prompt = f"Please provide a list of {term_count} search terms related to '{topic}' for researching and writing an article. Respond with the search terms in a Python-parseable list, separated by commas."

    model = genai.GenerativeModel(model_name=GEMINI_MODEL, safety_settings=safety_settings, system_instruction=system_prompt)
    response = model.generate_content(prompt, generation_config=generation_config)
    response_text = response.candidates[0].content.parts[0].text
    response_data = response_text.strip().removeprefix("```python").removesuffix("```")

    search_terms = ast.literal_eval(response_data)
    return search_terms

def get_search_results(search_term):
    url = f"https://serpapi.com/search.json?q={search_term}&api_key={SERP_API_KEY}"
    response = requests.get(url)
    data = response.json()
    return data['organic_results']

def select_relevant_urls(search_results):
    system_prompt = "You are a journalist assistant. From the given search results, select the URLs that seem most relevant and informative for writing an article on the topic."
    search_results_text = "\n".join([f"{i+1}. {result['link']}" for i, result in enumerate(search_results)])
    prompt = f"Search Results:\n{search_results_text}\n\nPlease select the numbers of the URLs that seem most relevant and informative for writing an article on the topic. Respond with the numbers in a Python-parseable list, separated by commas."

    model = genai.GenerativeModel(model_name=GEMINI_MODEL, safety_settings=safety_settings, system_instruction=system_prompt)
    response = model.generate_content(prompt, generation_config=generation_config)
    response_text = response.candidates[0].content.parts[0].text
    response_data = response_text.strip().removeprefix("```python").removesuffix("```")

    numbers = ast.literal_eval(response_data)
    relevant_indices = [int(num) - 1 for num in numbers]
    relevant_urls = [search_results[i]['link'] for i in relevant_indices]

    return relevant_urls

def get_article_text(url):
    article = Article(url)
    article.download()
    article.parse()
    return article.text

def write_article(topic, article_texts):
    system_prompt = "You are a journalist. Write a high-quality, NYT-worthy article on the given topic based on the provided article texts. The article should be well-structured, informative, and engaging."
    combined_text = "\n\n".join(article_texts)
    prompt = f"Topic: {topic}\n\nArticle Texts:\n{combined_text}\n\nPlease write a high-quality, NYT-worthy article on the topic based on the provided article texts. The article should be well-structured, informative, and engaging. Ensure the length is at least as long as a NYT cover story -- at a minimum, 15 paragraphs."

    model = genai.GenerativeModel(model_name=GEMINI_MODEL, safety_settings=safety_settings, system_instruction=system_prompt)
    response = model.generate_content(prompt, generation_config=generation_config)
    response_text = response.candidates[0].content.parts[0].text

    article = response_text
    return article

def edit_article(article):
    system_prompt = "You are an editor. Review the given article and provide suggestions for improvement. Focus on clarity, coherence, and overall quality."
    prompt = f"Article:\n{article}\n\nPlease review the article and provide suggestions for improvement. Focus on clarity, coherence, and overall quality."

    model = genai.GenerativeModel(model_name=GEMINI_MODEL, safety_settings=safety_settings, system_instruction=system_prompt)
    response = model.generate_content(prompt, generation_config=generation_config)
    response_text = response.candidates[0].content.parts[0].text
    response_data = response_text.strip().removeprefix("```python").removesuffix("```")

    suggestions = response_data

    system_prompt = "You are an editor. Rewrite the given article based on the provided suggestions for improvement."
    prompt = f"Original Article:\n{article}\n\nSuggestions for Improvement:\n{suggestions}\n\nPlease rewrite the article based on the provided suggestions for improvement."

    model = genai.GenerativeModel(model_name=GEMINI_MODEL, safety_settings=safety_settings, system_instruction=system_prompt)
    response = model.generate_content(prompt, generation_config=generation_config)
    response_text = response.candidates[0].content.parts[0].text
    response_data = response_text.strip().removeprefix("```python").removesuffix("```")

    edited_article = response_data
    return edited_article

# User input
topic = input("Enter a topic to write about: ")
do_edit = input("After the initial draft, do you want an automatic edit? This may improve performance, but is slightly unreliable. Answer 'yes' or 'no'.")

# Generate search terms
search_terms = get_search_terms(topic)
print(f"\nSearch Terms for '{topic}':")
print(", ".join(search_terms))

# Perform searches and select relevant URLs
relevant_urls = []
for term in search_terms:
    search_results = get_search_results(term)
    urls = select_relevant_urls(search_results)
    relevant_urls.extend(urls)

print('Relevant URLs to read:', relevant_urls)


# Get article text from relevant URLs
article_texts = []
for url in relevant_urls:
  try:
    text = get_article_text(url)
    if len(text) > 75:
      article_texts.append(text)
  except:
    pass

print('Articles to reference:', article_texts)

print('\n\nWriting article...')
# Write the article
article = write_article(topic, article_texts)
print("\nGenerated Article:")
print(article)

if 'y' in do_edit:
  # Edit the article
  edited_article = edit_article(article)
  print("\nEdited Article:")
  print(edited_article)