## Necessary Imports for Brochure creator

In [7]:
import os
import json
import requests
from dotenv import load_dotenv
from typing import List
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display
from openai import OpenAI

## Connecting to OpenAI (make sure you add OPENAI_API_KEY in .env file )

In [8]:
# Load environment variables in a file called .env

load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

In [9]:
MODEL="gpt-4o-mini"
openai = OpenAI()

# creating class WebCrawller that will scrap content from websites provided 

In [16]:
# Some websites need to use proper headers when fetching them:
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class WebCrawller:
    """
    A web crawler class to fetch and parse web page data, including the title, body content, 
    links, and text.

    Attributes:
        url (str): The URL of the web page.
        title (str): The title of the web page (if available).
        body (str): The raw HTML content of the page.
        links (List[str]): A list of all hyperlink URLs found on the page.
        text (str): The plain text content of the page body, excluding scripts, styles, and images.

    Methods:
        __init__(url): Initializes the crawler by fetching the web page and extracting its content.
    """
    
    def __init__(self, url: str):
        """
        Initializes the WebCrawller object, fetching the web page content and parsing its 
        title, body, plain text, and links.

        Args:
            url (str): The URL of the web page to crawl.

        Raises:
            requests.exceptions.RequestException: If the HTTP request fails.
        """
        self.url = url  # Store the provided URL

        # Send an HTTP GET request to the URL with headers
        response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
        self.body = response.content  # Save the raw HTML content of the page
        
        # Parse the HTML content using BeautifulSoup
        soup = BeautifulSoup(self.body, 'html.parser')
        
        # Extract the page title, or use a default message if the title is not present
        self.title = soup.title.string if soup.title else "No title found"

        # Process the <body> content, removing irrelevant tags like scripts, styles, etc.
        if soup.body:
            for irrelevant in soup.body(["script", "style", "img", "input"]):
                irrelevant.decompose()  # Remove the tag and its content
            # Get the plain text content from the body, separating lines by newline characters
            self.text = soup.body.get_text(separator="\n", strip=True)
        else:
            self.text = ""  # If there's no <body>, set text to an empty string
        
        # Find all <a> tags and extract their href attributes
        links = [a.get('href') for a in soup.find_all('a')]
        # Filter out None values and keep only valid links
        self.links = [link for link in links if link]

    def get_contents(self):
        return f"Webpage Title:\n {self.title} \n Webpage Contents:\n{self.text}\n\n"

In [17]:
website = WebCrawller("https://www.doctustech.com/")

In [18]:
website.get_contents()

"Webpage Title:\n HCC coding services for risk adjustment - DoctusTech \n Webpage Contents:\nDoctus\nTech\nHCC CODING TOOLS\nHCC Coding App\nIntegrated Platform\nHCC INTEGRATED PLATFORM\nAthena Health\nWhy Partner With Us\nAbout Us\nDoctusTech VBC hub\nBlogs\nEBooks & White Papers\nVideos\nFact Sheets & Guides\nCase Studies\nCareers\nX\nEdit Content\nMENU\nHCC CODING TOOLS\nHCC Coding App\nIntegrated Platform\nHCC INTEGRATED PLATFORM\nAthena Health\nWhy Partner With Us\nAbout Us\nDoctusTech VBC hub\nBlogs\nEBooks & White Papers\nVideos\nFact Sheets & Guides\nCase Studies\nCareers\nSTART 14-DAY TRIAL\nSCHEDULE A DEMO\n×\nStart 14-Day HCC DOC Trial\nIf you are interested in a trial of our HCC Coding Mobile App, please share your information below and we will respond right away.\nWe make HCC coding education easy.\nDoctusTech’s expert HCC coding services offer the best way for clinicians to learn HCC coding.\n9 out of 10 doctors prefer DoctusTech to any other method of HCC training – find

In [19]:
website.links

['https://www.doctustech.com',
 '#',
 'https://www.doctustech.com/hcc-coding-app/',
 'https://www.doctustech.com/hcc-integrated-platform/',
 'https://www.doctustech.com/hcc-integrated-platform/',
 'https://www.doctustech.com/hcc-integration-athenahealth/',
 'https://www.doctustech.com/why-partner-with-us/',
 'https://www.doctustech.com/about-us/',
 'https://www.doctustech.com/doctustech-vbc-hub/',
 'https://www.doctustech.com/blog/',
 'https://www.doctustech.com/ebooks-white-papers/',
 'https://www.doctustech.com/videos/',
 'https://www.doctustech.com/fact-sheets-guides/',
 'https://www.doctustech.com/case-studies/',
 'https://www.doctustech.com/careers-at-doctustech/',
 '#',
 '#',
 '#',
 'https://www.doctustech.com/hcc-coding-app/',
 'https://www.doctustech.com/hcc-integrated-platform/',
 'https://www.doctustech.com/hcc-integrated-platform/',
 'https://www.doctustech.com/hcc-integration-athenahealth/',
 'https://www.doctustech.com/why-partner-with-us/',
 'https://www.doctustech.com/ab

In [20]:
system_prompt_links = "You are provided with a list of links found on webpage your are able to decide which of the links would  \
be most relevant to include in a brochure about the company  such as links to an About Page or Company page, or Careers/Job Pages \n"
system_prompt_links += "You should respond in JSON as in this example"
system_prompt_links += """
    {
        "links": [
            {"type": "About Page", "url": "https://google.com/about/"},
            {"type": "Careers Page", "url": "https://google.com/careers/"}
        ]
    }
"""

In [23]:
def get_links_user_prompt(website):
    user_prompt = f"Here is the list of links on the website on {website.url}"
    user_prompt += "Please decide which of these are relevant web links for brochure about the company, response with the full https URL \
    do not include terms of service, privacy policy, emails links"
    user_prompt += "Links (some might be relative links)"
    user_prompt += "\n".join(website.links)
    return user_prompt
    

In [24]:
get_links_user_prompt(website)

'Here is the list of links on the website on https://www.doctustech.com/Please decide which of these are relevant web links for brochure about the company, response with the full https URL     do not include terms of service, privacy policy, emails linksLinks (some might be relative links)https://www.doctustech.com\n#\nhttps://www.doctustech.com/hcc-coding-app/\nhttps://www.doctustech.com/hcc-integrated-platform/\nhttps://www.doctustech.com/hcc-integrated-platform/\nhttps://www.doctustech.com/hcc-integration-athenahealth/\nhttps://www.doctustech.com/why-partner-with-us/\nhttps://www.doctustech.com/about-us/\nhttps://www.doctustech.com/doctustech-vbc-hub/\nhttps://www.doctustech.com/blog/\nhttps://www.doctustech.com/ebooks-white-papers/\nhttps://www.doctustech.com/videos/\nhttps://www.doctustech.com/fact-sheets-guides/\nhttps://www.doctustech.com/case-studies/\nhttps://www.doctustech.com/careers-at-doctustech/\n#\n#\n#\nhttps://www.doctustech.com/hcc-coding-app/\nhttps://www.doctustech.

In [26]:
def get_links(url):
    website = WebCrawller(url)
    completion = openai.chat.completions.create(
        model =  MODEL,
        messages = [
            {"role": "system", "content": system_prompt_links},
            {"role": "user", "content": get_links_user_prompt(website)}
        ],
        response_format={"type": "json_object"}
    )
    result = completion.choices[0].message.content
    return json.loads(result)
    

In [28]:
get_links('https://www.doctustech.com')

{'links': [{'type': 'About Page',
   'url': 'https://www.doctustech.com/about-us/'},
  {'type': 'Careers Page',
   'url': 'https://www.doctustech.com/careers-at-doctustech/'},
  {'type': 'Why Partner With Us',
   'url': 'https://www.doctustech.com/why-partner-with-us/'},
  {'type': 'Blog', 'url': 'https://www.doctustech.com/blog/'},
  {'type': 'Case Studies', 'url': 'https://www.doctustech.com/case-studies/'},
  {'type': 'Fact Sheets and Guides',
   'url': 'https://www.doctustech.com/fact-sheets-guides/'},
  {'type': 'Ebooks and White Papers',
   'url': 'https://www.doctustech.com/ebooks-white-papers/'},
  {'type': 'Videos', 'url': 'https://www.doctustech.com/videos/'}]}

#  Now lets create Brochure

In [31]:
def get_all_details(url):
    result = "Landing page:\n"
    result += WebCrawller(url).get_contents()
    links = get_links(url)
    print("Found links:", links)
    for link in links["links"]:
        result += f"\n\n{link['type']}\n"
        result += WebCrawller(link["url"]).get_contents()
    return result

In [34]:
system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information."

# Or uncomment the lines below for a more humorous brochure - this demonstrates how easy it is to incorporate 'tone':

# system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
# and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
# Include details of company culture, customers and careers/jobs if you have the information."

In [35]:
def get_brochure_user_prompt(company_name, url):
    user_prompt = f"You are looking at a company called: {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\n"
    user_prompt += get_all_details(url)
    user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters
    return user_prompt

In [36]:
get_brochure_user_prompt("Doctustech", "https://doctustech.com")

Found links: {'links': [{'type': 'About Page', 'url': 'https://www.doctustech.com/about-us/'}, {'type': 'Careers Page', 'url': 'https://www.doctustech.com/careers-at-doctustech/'}, {'type': 'Why Partner with Us', 'url': 'https://www.doctustech.com/why-partner-with-us/'}, {'type': 'Blog', 'url': 'https://www.doctustech.com/blog/'}, {'type': 'Ebooks and White Papers', 'url': 'https://www.doctustech.com/ebooks-white-papers/'}, {'type': 'Videos', 'url': 'https://www.doctustech.com/videos/'}, {'type': 'Fact Sheets and Guides', 'url': 'https://www.doctustech.com/fact-sheets-guides/'}, {'type': 'Case Studies', 'url': 'https://www.doctustech.com/case-studies/'}]}


"You are looking at a company called: Doctustech\nHere are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\nLanding page:\nWebpage Title:\n HCC coding services for risk adjustment - DoctusTech \n Webpage Contents:\nDoctus\nTech\nHCC CODING TOOLS\nHCC Coding App\nIntegrated Platform\nHCC INTEGRATED PLATFORM\nAthena Health\nWhy Partner With Us\nAbout Us\nDoctusTech VBC hub\nBlogs\nEBooks & White Papers\nVideos\nFact Sheets & Guides\nCase Studies\nCareers\nX\nEdit Content\nMENU\nHCC CODING TOOLS\nHCC Coding App\nIntegrated Platform\nHCC INTEGRATED PLATFORM\nAthena Health\nWhy Partner With Us\nAbout Us\nDoctusTech VBC hub\nBlogs\nEBooks & White Papers\nVideos\nFact Sheets & Guides\nCase Studies\nCareers\nSTART 14-DAY TRIAL\nSCHEDULE A DEMO\n×\nStart 14-Day HCC DOC Trial\nIf you are interested in a trial of our HCC Coding Mobile App, please share your information below and we will respond right away.\nWe m

In [37]:
def create_brochure(company_name, url):
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
          ],
    )
    result = response.choices[0].message.content
    display(Markdown(result))

In [38]:
create_brochure("Doctustech", "https://doctustech.com")

Found links: {'links': [{'type': 'About Page', 'url': 'https://www.doctustech.com/about-us/'}, {'type': 'Careers Page', 'url': 'https://www.doctustech.com/careers-at-doctustech/'}, {'type': 'Partnership Information', 'url': 'https://www.doctustech.com/why-partner-with-us/'}, {'type': 'Blog', 'url': 'https://www.doctustech.com/blog/'}, {'type': 'Ebooks and White Papers', 'url': 'https://www.doctustech.com/ebooks-white-papers/'}, {'type': 'Videos', 'url': 'https://www.doctustech.com/videos/'}, {'type': 'Fact Sheets and Guides', 'url': 'https://www.doctustech.com/fact-sheets-guides/'}, {'type': 'Case Studies', 'url': 'https://www.doctustech.com/case-studies/'}]}


# DoctusTech Brochure

## Who We Are
DoctusTech is a pioneering health platform company dedicated to enhancing value-based care capabilities for providers, payers, healthcare systems, and accountable care organizations (ACOs). Our focus on HCC (Hierarchical Condition Category) coding services empowers clinicians and healthcare organizations to optimize their risk adjustment strategies through better education, effective workflows, and reliable data.

## Our Services
We provide a comprehensive suite of HCC coding tools designed to simplify and enhance coding practices:
- **HCC Coding App**: An intuitive mobile app that offers five-minute, app-based asynchronous learning to bridge knowledge gaps.
- **Integrated Platform**: Seamless integration with over 70 major EMRs, allowing you to aggregate payer, specialty, and hospital data.
- **Patient Diagnosis Assist Platform**: Automates chart review and ensures documentation integrity to increase efficiency and accuracy.

## Why Partner With Us?
At DoctusTech, we position ourselves as leaders in HCC coding education. With a remarkable 90% clinician engagement, our training methods are preferred over traditional approaches. We ensure higher documentation specificity and reduce dependency on coders, advancing profitability for Medicare risk contracts and enhancing overall healthcare delivery.

### Key Benefits
- 30% increase in RAF (Risk Adjustment Factor) accuracy.
- Proven strategies to close knowledge, workflow, and data gaps.

## Customer Testimonials
> "I like the way they ask the questions, because it's like I'm sitting for boards."  
> — **Dr. Villaplana-Canals, Florida**

> "This is more intuitive and is full of 'ah-ha moments'."  
> — **Dr. Bateman, Ohio**

> "The mobile app is wonderful...it gets them thinking."  
> — **Teresa Caniglia, CDI, Ohio**

## Company Culture
At DoctusTech, we foster an environment of innovation, inclusivity, and continuous learning. Our team is dedicated to empowering each other and our clients through collaboration and knowledge-sharing. We value a healthy work-life balance and strive to create an atmosphere where everyone's contributions are recognized and celebrated.

## Careers at DoctusTech
We are looking for passionate individuals committed to transforming healthcare through technology. Our career opportunities span a variety of disciplines including coding, software development, project management, and education. Join us in our mission to deliver best-in-class coding solutions and enhance patient care.

### Current Openings
- Software Developer
- HCC Trainer
- Project Manager

If you are driven to make a difference in the healthcare landscape, we invite you to explore career opportunities at DoctusTech.

## Get Started Today!
Discover how DoctusTech can enhance your healthcare journey.  
- **Start a 14-Day Trial**: Experience the benefits of our HCC Coding App firsthand.  
- **Schedule a Demo**: Let us show you how our solutions can meet your specific needs.

For more information, visit our website or contact us.

--- 
**Explore the Future of Value-Based Care with DoctusTech.**  
**[www.doctustech.com](http://www.doctustech.com)**  

In [39]:
def stream_brochure(company_name, url):
    stream = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
          ],
        stream=True
    )
    
    response = ""
    display_handle = display(Markdown(""), display_id=True)
    for chunk in stream:
        response += chunk.choices[0].delta.content or ''
        response = response.replace("```","").replace("markdown", "")
        update_display(Markdown(response), display_id=display_handle.display_id)

In [41]:
stream_brochure("invisible", "https://www.invisible.co/")

Found links: {'links': [{'type': 'About Page', 'url': 'https://www.invisible.co/about'}, {'type': 'Careers Page', 'url': 'https://www.invisible.co/join-us'}, {'type': 'Technology Careers', 'url': 'https://www.invisible.co/technology-careers'}, {'type': 'Operations Careers', 'url': 'https://www.invisible.co/operations-careers'}, {'type': 'Case Studies', 'url': 'https://www.invisible.co/case-studies'}, {'type': 'Industries - AI', 'url': 'https://www.invisible.co/industries/ai'}, {'type': 'Industries - Financial Services', 'url': 'https://www.invisible.co/industries/financial-services'}, {'type': 'Industries - Healthcare', 'url': 'https://www.invisible.co/industries/healthcare'}, {'type': 'Industries - Insurance', 'url': 'https://www.invisible.co/industries/insurance'}, {'type': 'Industries - Retail', 'url': 'https://www.invisible.co/industries/retail'}, {'type': 'Industries - Technology', 'url': 'https://www.invisible.co/industries/technology'}, {'type': 'Solutions - Intelligent Operatio

# Invisible Technologies Brochure

## About Us
Invisible Technologies is a premier provider of intelligent operations, seamlessly integrating cutting-edge AI with elite human expertise to streamline complex processes for organizations across various industries. Ranked 61st on the 2024 Deloitte Technology Fast 500 and recognized as the #3 Fastest Growing AI Company by Inc. 5000 in 2024, we are trusted by 80% of the world’s leading AI models to solve enterprise problems effectively.

## Our Solutions
### Intelligent Operations
We offer end-to-end process automation solutions that combine AI, automation, and human expertise, enabling organizations to concentrate on their core products and services. Our agents—30% of whom hold Masters or PhDs—are skilled in over 30 coding languages and frameworks, ensuring that we can tackle any enterprise challenge.

### AI Training & Enablement
With our deep AI experience, companies can launch their tools sooner and focus on developing features that matter most to their customers. Whether refining small models or training large ones, we provide the expertise necessary for success.

## Industries We Serve
We cater to a multitude of industries, including:

- **Artificial Intelligence**
- **Financial Services**
- **Healthcare**
- **Insurance**
- **Retail**
- **Technology**

Our tailored solutions have saved clients millions of hours and significantly reduced operational costs.

## Customer Success Stories
- **Retail**: Streamlined recruitment for a retailer with 700 locations, leading to faster candidate selection.
- **Healthcare**: Optimized claims processing for Headway, achieving processing speeds that are eight times faster.
- **Finance**: Supported Nasdaq by saving 10,000 developer hours through efficient integration solutions.

## Company Culture
At Invisible, we believe in innovation, collaboration, and the power of diverse perspectives. Our team consists of professionals from over 40 cultures and languages, creating a rich tapestry of ideas that drive our solutions forward. We place a strong emphasis on continuous learning, work-life balance, and fostering an inclusive environment where every voice matters.

## Careers at Invisible
Join our team of experts dedicated to transforming enterprise operations! We are always looking for talented individuals in both technology and operations roles. Careers at Invisible provide the opportunity to work on groundbreaking projects and collaborate with a diverse team of professionals. 

- **Technology Careers**: Innovate with leading-edge AI technologies.
- **Operations Careers**: Help clients optimize their processes and drive business success.

If you're ready to be part of a dynamic team that is reshaping the future of business operations, [apply now](#) to see where you fit in at Invisible Technologies.

## Connect with Us
Ready to revolutionize your operations? [Request a demo](#) or speak to our team today! Unlock your strategic vision with Invisible Technologies—where people, technology, and AI expertise converge.

---

### Contact Information
[Website](https://invisible.com)  
[Email us](mailto:info@invisible.com)

Let’s innovate how you operate!