# Project: Clipping Scraper

## Purpose
[Describe the purpose and goal of your clipping scraper project]

## Plan
[Outline your approach and methodology]

## Next Steps
[List the planned next steps and milestones]

## Technologies Used
[Key technologies and libraries being utilized]

In [3]:
# Modules necessary to run the project
# Operating system interface for file system operations and path handling
import os
# HTTP library for making web requests and handling responses
import requests
# Load environment variables from .env file for configuration management
from dotenv import load_dotenv
# HTML/XML parsing library for web scraping and data extraction
from bs4 import BeautifulSoup
# Jupyter notebook display utilities for formatted content rendering
from IPython.display import Markdown, display
# OpenAI API client for AI model interactions and completions
from openai import OpenAI

In [4]:
#Class copied from the course material

headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:

    def __init__(self, url):
        """
        Create this Website object from the given url using the BeautifulSoup library
        """
        self.url = url
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.content, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        for irrelevant in soup.body(["script", "style", "img", "input"]):
            irrelevant.decompose()
        self.text = soup.body.get_text(separator="\n", strip=True)

In [6]:
#This notebook will implement the user_prompt_for function that receives a website
# and returns the complete user prompt.

def user_prompt_for(website,user_prompt):
    final_prompt = f"You are looking at a website titled {website.title}"
    final_prompt += user_prompt
    final_prompt += website.text
    return final_prompt

def messages_for(website, user_prompt,system_prompt):
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt_for(website, user_prompt)}
    ]

In [None]:
## Here we connect with the local LLM API using openai library

openai = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')

# Set the message structure for interacting with the LLM
# System is the role assigned to the model
# User is the role for the input message
system_prompt = "You are a semiconductor industry junior analyst in charge of making a\
clipping of the news on the industry. Ignore text that might be navigation related. \
Respond in markdown format with clear section headers."

user_prompt = "\n The contents of this website is as follows; \
please provide a short summary of this website in markdown format. \
If it includes news or announcements, then categorize in tools, innovation,\
financial and market sections and summarize these too in markdown format"

semi = Website("https://semiconductor-today.com/")




# Send the message to the LLM and get the response
messages = messages_for(semi, user_prompt, system_prompt)
response = openai.chat.completions.create(
    model="llama3.2",
    messages=messages
)

result = response.choices[0].message.content
display(Markdown(result))


