In [2]:
import os
import requests
from bs4 import BeautifulSoup
from typing import List
from dotenv import load_dotenv
from openai import OpenAI
import gradio as gr # oh yeah!

In [3]:
load_dotenv()
openai_api_key = os.getenv('OPENAI_API_KEY')

if openai_api_key:
    print(f"OpenAI API Key exists and begins {openai_api_key[:8]}")
else:
    print("OpenAI API Key not set")

openai = OpenAI()

OpenAI API Key exists and begins sk-proj-


## Create Scraper Website Class

https://uai.ac.id/en/home/

https://uai.ac.id/en/about-us/

https://uai.ac.id/en/academics/

https://penerimaan.uai.ac.id/

In [4]:
class Website:
    def __init__(self, urls):
        self.urls = urls if isinstance(urls, list) else [urls]
        self.webpages = []
        for url in self.urls:
            self.process_url(url)

    def process_url(self, url):
        response = requests.get(url)
        body = response.content
        soup = BeautifulSoup(body, 'html.parser')
        title = soup.title.string if soup.title else "No title found"
        for irrelevant in soup.body(["script", "style", "img", "input"]):
            irrelevant.decompose()
        text = soup.body.get_text(separator="\n", strip=True)

        social_links = self.get_social_media_links(soup)
        self.webpages.append({
            "url": url,
            "title": title,
            "text": text,
            "social_links": social_links
        })

    def get_contents(self):
        contents = ""
        for page in self.webpages:
            contents += f"URL: {page['url']}\n"
            contents += f"Webpage Title: {page['title']}\n"
            contents += f"Webpage Contents:\n{page['text']}\n\n"
            
            # Add social media links if available
            if page['social_links']:
                contents += "Social Media Links:\n"
                for link in page['social_links']:
                    contents += f"- {link}\n"
            contents += "\n"
        return contents

    def get_social_media_links(self, soup):
        social_media_sites = ["twitter", "facebook", "instagram", "linkedin", "youtube"]
        social_links = []

        # Find all links that contain social media URLs
        for link in soup.find_all("a", href=True):
            href = link['href'].lower()
            for site in social_media_sites:
                if site in href:
                    social_links.append(href)
                    break
        
        return social_links

In [5]:
urls = [
    "https://uai.ac.id/en/home/",
    "https://uai.ac.id/en/about-us/",
    "https://uai.ac.id/en/academics/",
    "https://penerimaan.uai.ac.id/",
]

## Steam GPT

In [12]:
system_message = (
    "You are an assistant that analyzes the contents of a university or college website landing page "
    "and creates a concise, informative, and visually engaging brochure. The brochure should be tailored for "
    "prospective students, academic partners, and potential investors. Highlight the university's academic strengths, "
    "vision and mission, key faculties or departments, student life, accreditation, achievements, and global collaborations. "
    "Include admissions information, contact details, and direct links to apply or learn more. Respond in markdown format, "
    "suitable for web and print adaptation."
)

def stream_gpt(prompt):
    messages = [
        {"role": "system", "content": system_message},
        {"role": "user", "content": prompt}
      ]
    stream = openai.chat.completions.create(
        model='gpt-4-turbo',
        messages=messages,
        stream=True
    )
    result = ""
    for chunk in stream:
        result += chunk.choices[0].delta.content or ""
        yield result

## Stream Brochure

In [13]:
def stream_brochure(company_name, urls):
    url_list = urls.split('\n')
    prompt = (
    f"Please generate a professional and visually appealing company brochure for {company_name}. "
    "The brochure should highlight the company's vision, services/products, key achievements, and contact information. "
    "Make sure to include design elements that match the company's branding. "
    "Also, clearly feature the following comapny and admission links"
    "The brochure should be suitable for both print and digital distribution. "
)
    prompt += Website(url_list).get_contents()
    
    result = stream_gpt(prompt)
    yield from result

## UI

In [14]:
def add_url(current_urls, new_url):
    if not new_url.strip():
        return current_urls, ""
    
    if not (new_url.startswith("http://") or new_url.startswith("https://")):
        new_url = "https://" + new_url
    
    if current_urls:
        if not current_urls.endswith('\n'):
            current_urls += '\n'
        current_urls += new_url
    else:
        current_urls = new_url
    
    return current_urls, ""  

with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as app:
    gr.Markdown("## Company Brochure Generator")
    
    with gr.Row():
        company_name = gr.Textbox(
            label="Company Name", 
            placeholder="Enter company name",
            show_label=True,
            scale=1
        )
    
    gr.Markdown("### URLs")
    
    with gr.Row():
        new_url_input = gr.Textbox(
            label="", 
            placeholder="Enter URL (e.g., example.com)",
            show_label=False,
            scale=4
        )
        add_button = gr.Button("Add", size="md", scale=1, variant="primary")
    
    urls_list = gr.Textbox(
        label="URL List", 
        placeholder="URLs will appear here or add here with new line",
        lines=3,
        show_label=True
    )
    
    generate_button = gr.Button("Generate Brochure", size="md", variant="primary")
    
    output = gr.Markdown(label="Generated Brochure")
    
    # Set up events
    add_button.click(
        fn=add_url, 
        inputs=[urls_list, new_url_input], 
        outputs=[urls_list, new_url_input]
    )
    
    generate_button.click(
        fn=stream_brochure, 
        inputs=[company_name, urls_list], 
        outputs=output,
        scroll_to_output=True,
        show_progress="minimal"
    )

# Launch the app with reduced height
app.launch(height=450, width="80%")

* Running on local URL:  http://127.0.0.1:7870

To create a public link, set `share=True` in `launch()`.


