<a href="https://colab.research.google.com/github/mshumer/ai-journalist/blob/main/Claude_Journalist.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install newspaper3k



In [2]:
import requests
from bs4 import BeautifulSoup
import newspaper
from newspaper import Article
import ast

MODEL = "llama3:8b" # Replace with your model
SERP_API_KEY = "YOUR API KEY"  # Replace with your SERP API key

just_need_result_message = "(Just give me the results)"

def get_search_terms(topic):
    term_count = 3;
    system_prompt = f"You are a world-class journalist. Generate a list of {term_count} search terms to search for to research and write an article about the topic."
    prompt = f"Please provide a list of {term_count} search terms related to '{topic}' for researching and writing an article. Respond with the search terms in a Python-parseable list, separated by commas.{just_need_result_message}"
    headers = {
        "content-type": "application/json"
    }
    data = {
        "model": MODEL,
        "options": {
           "temperature": 0.5,
           "num_predict": 200
        },
        "system": system_prompt,
        "prompt": prompt,
        "stream": False
    }

    response = requests.post("http://localhost:11434/api/generate", headers=headers, json=data)
    response_text = response.json()['response']
    search_terms = ast.literal_eval(response_text)
    return search_terms

def get_search_results(search_term):
    url = f"https://serpapi.com/search.json?q={search_term}&api_key={SERP_API_KEY}"
    response = requests.get(url)
    data = response.json()
    return data['organic_results']

def select_relevant_urls(search_results):
    system_prompt = "You are a journalist assistant. From the given search results, select the URLs that seem most relevant and informative for writing an article on the topic."
    search_results_text = "\n".join([f"{i+1}. {result['link']}" for i, result in enumerate(search_results)])
    prompt = f"Search Results:\n{search_results_text}\n\nPlease select the numbers of the URLs that seem most relevant and informative for writing an article on the topic. Respond with the numbers in a Python-parseable list, separated by commas.{just_need_result_message}"
    headers = {
        "content-type": "application/json"
    }
    data = {
        "model": MODEL,
        "options": {
           "temperature": 0.5,
           "num_predict": 200
        },
        "system": system_prompt,
        "prompt": prompt,
        "stream": False
    }
    response = requests.post("http://localhost:11434/api/generate", headers=headers, json=data)
    response_text = response.json()['response']

    numbers = ast.literal_eval(response_text)
    relevant_indices = [int(num) - 1 for num in numbers]
    relevant_urls = [search_results[i]['link'] for i in relevant_indices]

    return relevant_urls

def get_article_text(url):
    article = Article(url)
    article.download()
    article.parse()
    return article.text

def write_article(topic, article_texts):
    system_prompt = "You are a journalist. Write a high-quality, NYT-worthy article on the given topic based on the provided article texts. The article should be well-structured, informative, and engaging."
    combined_text = "\n\n".join(article_texts)
    prompt = f"Topic: {topic}\n\nArticle Texts:\n{combined_text}\n\nPlease write a high-quality, NYT-worthy article on the topic based on the provided article texts. The article should be well-structured, informative, and engaging. Ensure the length is at least as long as a NYT cover story -- at a minimum, 15 paragraphs."
    headers = {
        "content-type": "application/json"
    }
    data = {
        "model": MODEL,
        "options": {
           "temperature": 0.5,
           "num_predict": 3000
        },
        "system": system_prompt,
        "prompt": prompt,
        "stream": False
    }
    response = requests.post("http://localhost:11434/api/generate", headers=headers, json=data)
    article = response.json()['response']
    return article

def edit_article(article):
    system_prompt = "You are an editor. Review the given article and provide suggestions for improvement. Focus on clarity, coherence, and overall quality."
    prompt = f"Article:\n{article}\n\nPlease review the article and provide suggestions for improvement. Focus on clarity, coherence, and overall quality."
    headers = {
        "content-type": "application/json"
    }
    data = {
        "model": MODEL,
        "options": {
           "temperature": 0.5,
           "num_predict": 3000
        },
        "system": system_prompt,
        "prompt": prompt,
        "stream": False
    }
    response = requests.post("http://localhost:11434/api/generate", headers=headers, json=data)
    suggestions = response.json()['response']

    system_prompt = "You are an editor. Rewrite the given article based on the provided suggestions for improvement."
    prompt = f"Original Article:\n{article}\n\nSuggestions for Improvement:\n{suggestions}\n\nPlease rewrite the article based on the provided suggestions for improvement."
    data = {
        "model": MODEL,
        "options": {
           "temperature": 0.5,
           "num_predict": 3000
        },
        "system": system_prompt,
        "prompt": prompt,
        "stream": False
    }
    response = requests.post("http://localhost:11434/api/generate", headers=headers, json=data)
    edited_article = response.json()['response']
    return edited_article

# User input
topic = input("Enter a topic to write about: ")
do_edit = input("After the initial draft, do you want an automatic edit? This may improve performance, but is slightly unreliable. Answer 'yes' or 'no'.")

# Generate search terms
search_terms = get_search_terms(topic)
print(f"\nSearch Terms for '{topic}':")
print(", ".join(search_terms))

# Perform searches and select relevant URLs
relevant_urls = []
for term in search_terms:
    search_results = get_search_results(term)
    urls = select_relevant_urls(search_results)
    relevant_urls.extend(urls)

print('Relevant URLs to read:', relevant_urls)


# Get article text from relevant URLs
article_texts = []
for url in relevant_urls:
  try:
    text = get_article_text(url)
    if len(text) > 75:
      article_texts.append(text)
  except:
    pass

print('Articles to reference:', article_texts)

print('\n\nWriting article...')
# Write the article
article = write_article(topic, article_texts)
print("\nGenerated Article:")
print(article)

if 'y' in do_edit:
  # Edit the article
  edited_article = edit_article(article)
  print("\nEdited Article:")
  print(edited_article)

Enter a topic to write about:  dog
After the initial draft, do you want an automatic edit? This may improve performance, but is slightly unreliable. Answer 'yes' or 'no'. no



Search Terms for 'dog':
dog behavior characteristics, canine health issues prevention, training methods effective dogs
----------
1, 2, 3, 5, 6
----------
1, 3, 4, 5, 8
----------
1, 2, 4, 5, 7
Relevant URLs to read: ['https://www.countyofmerced.com/DocumentCenter/View/37/Dog-Breed-Characteristics-and-Behavior', 'https://en.wikipedia.org/wiki/Dog_behavior', 'https://vcahospitals.com/know-your-pet/dog-behaviorwhats-normal-and-whats-not', 'https://www.quora.com/Can-a-dogs-behavior-be-determined-solely-by-its-breed-or-are-other-factors-at-play', 'https://www.petmd.com/dog/behavior/does-breed-affect-behavior', 'https://vcahospitals.com/know-your-pet/preventive-health-care-guidelines-for-dogs', 'https://www.akcchf.org/canine-health/top-health-concerns/top-health-concerns.html', 'https://hastingsvet.com/6-common-dog-health-problems-prevention-tips-and-why-to-prevent/', 'https://petfriendlybox.com/resource-center/dog-health-problems', 'https://www.avma.org/resources/pet-owners/petcare/preven