In [143]:
# !pip install openai==0.28
from openai import OpenAI
import re
import httpx
import os
import json
from getpass import getpass
import requests
from bs4 import BeautifulSoup
from chatbot import ChatBot
from scraper import *

openai_api_key = getpass("🔑 Enter your OpenAI API key: ")
os.environ["OPENAI_API_KEY"] = openai_api_key


In [144]:
class WikiBot:
    
    def __init__(self,
                 paths={'general' : 'prompts/general_wiki_prompt.txt',
                        'directed' : 'prompts/directed_wiki_prompt.txt',
                        'repeat' : 'prompts/repeat_wiki_prompt.txt'}):
        self.prompts = {}
        for variant in paths:
            self.prompts[variant] = self.read_prompt(paths[variant])
        
    def read_prompt(self, path):
        with open(path) as f:
            ret = '\n'.join(f.readlines())
        return ret
    
    def get_wikipedia(self, topic):
        response = httpx.get("https://en.wikipedia.org/w/api.php", params={
            "action": "query",
            "list": "search",
            "srsearch": topic,
            "format": "json"
        })

        search_results = response.json().get("query", {}).get("search", [])
        if not search_results:
            return "No results found."

        first_result_title = search_results[0]["title"]
        page_url = f"https://en.wikipedia.org/wiki/{first_result_title.replace(' ', '_')}"
        return self.scrape_wikipedia_article(page_url)

    def scrape_wikipedia_article(self, url):
        response = requests.get(url)
        if response.status_code != 200:
            raise Exception(f"Failed to load page {url}")

        soup = BeautifulSoup(response.content, 'html.parser')

        content_div = soup.find(id='mw-content-text')
        if not content_div:
            raise Exception("Failed to find main content of the article")

        paragraphs = content_div.find_all('p')
        article_text = "".join([p.text for p in paragraphs if p.get_text().strip() != ""])
        return article_text
    
    def get_query(self, role, message):
        return {'role': role, 'content': message}
        
    def __call__(self, variant, topic, question=None):
        wiki_article = self.get_wikipedia(topic)
        if variant == 'general': prompt = wiki_article
        else: prompt = f'QUESTION: {question}\n\nARTICLE: {wiki_article}'
        
        result = self.execute(prompt, variant=variant)
        return result
    
    def execute(self, prompt, variant):
        messages = [self.get_query('system', self.prompts[variant]),
                    self.get_query('user', prompt)]
        print(messages)
        client = OpenAI()
        response = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=messages
        )
        return response.choices[0].message.content

In [157]:
class RepeatBot:
    
    def __init__(self, path='prompts/repeat_wiki_prompt.txt'):
        self.prompts = {}
        self.prompt = self.read_prompt(path)
        
    def read_prompt(self, path):
        with open(path) as f:
            ret = '\n'.join(f.readlines())
        return ret
    
    def get_wikipedia(self, topic):
        response = httpx.get("https://en.wikipedia.org/w/api.php", params={
            "action": "query",
            "list": "search",
            "srsearch": topic,
            "format": "json"
        })

        search_results = response.json().get("query", {}).get("search", [])
        if not search_results:
            return "No results found."

        first_result_title = search_results[0]["title"]
        page_url = f"https://en.wikipedia.org/wiki/{first_result_title.replace(' ', '_')}"
        return self.scrape_wikipedia_article(page_url)

    def scrape_wikipedia_article(self, url):
        response = requests.get(url)
        if response.status_code != 200:
            raise Exception(f"Failed to load page {url}")

        soup = BeautifulSoup(response.content, 'html.parser')

        content_div = soup.find(id='mw-content-text')
        if not content_div:
            raise Exception("Failed to find main content of the article")

        paragraphs = content_div.find_all('p')
        article_text = '\n'.join([p.text for p in paragraphs if p.get_text().strip() != ""])
        return article_text
    
    def get_query(self, role, message):
        return {'role': role, 'content': message}
        
    def __call__(self, topic, question=None):
        wiki_article = self.get_wikipedia(topic)
        prompt = f'QUESTION: {question}\n\nARTICLE: {wiki_article}'
        print(wiki_article)
        # result = self.execute(prompt, variant=variant)
        # return result
    
    def execute(self, prompt, variant):
        messages = [self.get_query('system', self.prompts[variant]),
                    self.get_query('user', prompt)]
        print(messages)
        client = OpenAI()
        response = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=messages
        )
        return response.choices[0].message.content

In [None]:
class WikiScraper:
    
    def __init__(self, topic):
        self.response = None
        self.soup = None
        self.full_text = None
        self.title = None
        self.page_url = None
        self.topic = topic
        self.find_article()
        self.scrape_article()
        
    def get_subsection(self, header):
        ...
        
    def find_article(self):
        self.response = httpx.get("https://en.wikipedia.org/w/api.php", params={
            "action": "query",
            "list": "search",
            "srsearch": self.topic,
            "format": "json"
        })

        search_results = self.response.json().get("query", {}).get("search", [])
        if not search_results:
            raise Exception("No results found.")

        self.title = search_results[0]["title"]
        self.page_url = f"https://en.wikipedia.org/wiki/{self.title.replace(' ', '_')}"
    
    def scrape_article(self):
        self.response = requests.get(self.page_url)
        if self.response.status_code != 200:
            raise Exception(f"Failed to load page {self.page_url}")

        self.soup = BeautifulSoup(self.response.content, 'html.parser')

        content_div = self.soup.find(id='mw-content-text')
        if not content_div:
            raise Exception("Failed to find main content of the article")

        paragraphs = content_div.find_all('p')
        self.full_text = '\n'.join([p.text for p in paragraphs if p.get_text().strip() != ""])

In [152]:
wikibot = WikiBot()

In [153]:
response = wikibot(variant='directed', topic='Dogs', question='What is dog herding?')

[{'role': 'system', 'content': 'You are a \'directed summarizing\' bot. Your job is to take as input a question and a Wikipedia article, and provide a \'directed summary\'. Specifically, this directed summary should contain instances in the article that relate to the inputted question. Please return this summary in bullet point format. Do not worry about accessing the articles, it will be provided to you. Your only responsibility is to summarize the input and return the summary in bullet point format. \n\n\n\nAn example input you might recieve would be the following:\n\n\n\nQUESTION: What is AI?\n\n\n\nARTICLE: Artificial intelligence (AI), in its broadest sense, is intelligence exhibited by machines, particularly computer systems. It is a field of research in computer science that develops and studies methods and software that enable machines to perceive their environment and uses learning and intelligence to take actions that maximize their chances of achieving defined goals.[1] Such

In [154]:
print(response)

- Dogs have been bred and trained for various roles, including hunting, herding, pulling loads, protection, assisting police and the military, companionship, therapy, and aiding disabled individuals

- Dogs have been selectively bred over millennia for different behaviors, sensory capabilities, and physical attributes

- Dogs have a natural instinct for herding livestock, which has been bred and trained for specific breeds like collies and sheepdogs

- Dog communication involves various behaviors like eye gaze, facial expressions, vocalization, body posture, and gustatory communication

- Eating dog meat is a social taboo in most parts of the world, with debates over banning its consumption

- Dogs have been a part of therapy to aid individuals with disabilities, such as guide dogs, hearing dogs, and mobility assistance dogs

- Dogs have an established history as symbols of guidance, protection, loyalty, and love in various cultures and mythologies

NO RELATION


In [34]:
print()

You are a summarizing bot. Your job is to read parts of Wikipedia articles scraped from the internet and return a summary of that input in bullet point format. Do not worry about accessing the articles, I will give that to you. Your only responsibility is to summarize the input and return the summary in bullet point format. Let me know when you are ready to begin.


In [158]:
repeatbot = RepeatBot()
repeatbot(topic='dog', question='what kinds of skumlls do dogs have?')

The dog (Canis familiaris or Canis lupus familiaris) is a domesticated descendant of the wolf. Also called the domestic dog, it is derived from the extinct Pleistocene wolf; the gray wolf is the dog's closest living relative. The dog was the first species to be domesticated by humans. Experts estimate that hunter-gatherers domesticated dogs more than 15,000 years ago, which was before the development of agriculture. Due to their long association with humans, dogs have expanded to a large number of domestic individuals and gained the ability to thrive on a starch-rich diet that would be inadequate for other canids.[4]

The dog has been selectively bred over millennia for various behaviors, sensory capabilities, and physical attributes.[5] Dog breeds vary widely in shape, size, and color. They perform many roles for humans, such as hunting, herding, pulling loads, protection, assisting police and the military, companionship, therapy, and aiding disabled people. Over the millennia, dogs b