# Sports Website Scraper Aggregator using Ollama-based LLM

In [57]:
# imports
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from IPython.display import Markdown, display
from openai import OpenAI
import time


In [None]:
# Initialize the OpenAI client
ollama = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')

In [61]:
# Define Website class

class Website:

    def __init__(self, url):
        """
        Create this Website object from the given url using Selenium + BeautifulSoup
        Supports JavaScript and normal websites uniformly.
        """
        self.url = url

        # Configure headless Chrome
        options = Options()
        options.add_argument('--headless')
        options.add_argument('--no-sandbox')
        options.add_argument('--disable-dev-shm-usage')

        # Use webdriver-manager to manage ChromeDriver
        service = Service(ChromeDriverManager().install())

        # Initialize the Chrome WebDriver with the service and options
        driver = webdriver.Chrome(service=service, options=options)

        # Start Selenium WebDriver
        driver.get(url)

        # Wait for Webpage to load
        time.sleep(5)

        # Fetch the page source
        page_source = driver.page_source
        driver.quit()

        # Parse the HTML content with Beautiful Soup
        soup = BeautifulSoup(page_source, 'html.parser')

        # Extract title
        self.title = soup.title.string if soup.title else "No title found"

        # Remove unnecessary elements
        for irrelevant in soup.body(["script", "style", "img", "input"]):
            irrelevant.decompose()
        
        # Extract the main text
        self.text = soup.body.get_text(separator='\n', strip=True)

In [62]:
# Define System Prompt
system_prompt = "You are an enthusiastic sports fanatic assistant that analyzes the contents of a sports website \
and provides a list of all of the sports news, ignoring text that might be navigation related. \
Respond in markdown."

# Define User Prompt function
def user_prompt_for(website):
    user_prompt = f"You are looking at a website titled {website.title}"
    user_prompt += "\nThe contents of this website is as follows; \
please provide a list of all of the sports news organized by sport on the website in markdown. \n\n"
    user_prompt += website.text
    return user_prompt

# Define message function
def message_for(website):
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt_for(website)}
    ]


In [None]:

# Define Summarize function
def summarize(url):
    website = Website(url)
    response = ollama.chat.completions.create(model="llama3.2", messages=message_for(website))
    return response.choices[0].message.content

In [None]:
# Display summary of website
def display_summary(url):
    summary = summarize(url)
    display(Markdown(summary))

In [None]:
display_summary("https://www.espn.com/")