# 📊 Company Analysis with GenAI

This notebook performs a structured analysis of companies using Generative AI.
The goal is to collect and process relevant information, then feed it into GPT-4-o for deeper insights.

## 🔍 Objective

We will:
- Extract company details
- Process and structure the information
- Use GPT-4-o to analyze and summarize insights

## 📥 Data Collection

The notebook gathers data from various sources, processes it, and structures it into a meaningful format for further analysis.

## 🛠️ Data Processing

The collected data is cleaned and formatted to ensure compatibility with GPT-4-o.

## 🤖 AI-Powered Analysis

GPT-4-o is used to analyze and generate insights based on the structured company data.

## 📈 Results and Interpretation

The AI-generated insights are reviewed and interpreted for decision-making.

## 📌 Conclusion

This process enables a structured, AI-driven approach to company analysis, providing valuable insights for business intelligence and decision-making.

In [None]:
# Import necessary libraries
# imports

import os
import requests
import json
from typing import List
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display
from openai import OpenAI

In [None]:
# Initialize and constants

load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:
    print("API key looks good so far")
else:
    print("There might be a problem with your API key? Please visit the troubleshooting notebook!")
    
MODEL = 'gpt-4o-mini'
openai = OpenAI()

In [None]:
# A class to represent a Webpage

# Some websites need you to use proper headers when fetching them:
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:
    """
    A utility class to represent a Website that we have scraped, now with links
    """

    def __init__(self, url):
        self.url = url
        response = requests.get(url, headers=headers)
        self.body = response.content
        soup = BeautifulSoup(self.body, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        if soup.body:
            for irrelevant in soup.body(["script", "style", "img", "input"]):
                irrelevant.decompose()
            self.text = soup.body.get_text(separator="\n", strip=True)
        else:
            self.text = ""
        links = [link.get('href') for link in soup.find_all('a')]
        self.links = [link for link in links if link]

    def get_contents(self):
        return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"

In [None]:
fair_supply = Website("https://www.linkedin.com/company/fairsupply-com-au-pty-limited/")
print(fair_supply.get_contents())

In [None]:
fair_supply.links


In [None]:
link_system_prompt = (
    "You are provided with a list of links found on a LinkedIn page. "
    "Decide which of these links are most relevant for a thorough company analysis—"
    "for example, a company profile, job postings, or employee profiles. "
    "Exclude Terms of Service, Privacy policy, or email links. "
    "You should respond in JSON as in this example:"
)
link_system_prompt += """
{
    "links": [
        {"type": "company profile", "url": "https://www.linkedin.com/company/example-company"},
        {"type": "job posting", "url": "https://www.linkedin.com/jobs/view/123456/"},
        {"type": "employee profile", "url": "https://www.linkedin.com/in/john-doe"}
    ]
}
"""



In [None]:
print(link_system_prompt)

In [None]:
def get_links_user_prompt(website):
    user_prompt = (
        f"Here is the list of links on the website of {website.url}. "
        "Please decide which of these are relevant for a thorough company analysis. "
        "Specifically, we are looking for pages or resources that help assess the company's background, mission, "
        "values, leadership, culture, finances, or job opportunities. "
        "Provide your response in JSON format, listing the full https URLs. "
        "Do not include Terms of Service, Privacy policy, or email links.\n"
        "Links (some might be relative links):\n"
    )
    user_prompt += "\n".join(website.links)
    return user_prompt


In [None]:
print(get_links_user_prompt(fair_supply))

In [None]:
def get_links(url):
    website = Website(url)
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(website)}
      ],
        response_format={"type": "json_object"}
    )
    result = response.choices[0].message.content
    return json.loads(result)

In [None]:
# Checking links from LinkedIn on Fair Supply LinkedIn page
get_links("https://www.linkedin.com/company/fairsupply-com-au-pty-limited/")

In [None]:
def get_all_details(url):
    result = "Landing page:\n"
    result += Website(url).get_contents()
    links = get_links(url)
    print("Found links:", links)
    for link in links["links"]:
        result += f"\n\n{link['type']}\n"
        result += Website(link["url"]).get_contents()
    return result

In [None]:
print(get_all_details("https://www.linkedin.com/company/fairsupply-com-au-pty-limited/people/"))

In [None]:
system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website, \
focusing on a thorough company analysis. Provide a concise overview of the company’s background, mission, \
values, leadership, workplace culture, and job opportunities. Respond in Markdown format. \
Include any details that would be helpful for a prospective candidate evaluating the company, \
such as career growth, employee development, or insights into day-to-day life, if available."


In [None]:
def get_company_analysis_user_prompt(company_name, url):
    user_prompt = f"You are looking at a company called: {company_name}\n"
    user_prompt += (
        "Here are the contents of its linkedin page and other relevant pages. "
        "Use this information to create a concise overview of the company that focuses on a thorough company analysis. "
        "Highlight the company's mission, values, leadership, culture, and job opportunities—"
        "any insights relevant for someone evaluating this company as a potential employer. "
        "Respond in Markdown.\n"
    )
    user_prompt += get_all_details(url)
    user_prompt = user_prompt[:20_000]  # Truncate if more than 20,000 characters
    return user_prompt


In [None]:
get_company_analysis_user_prompt("Fair Supply", "https://www.linkedin.com/company/fairsupply-com-au-pty-limited/")

In [None]:
def create_company_analysis(company_name, url):
    stream = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_company_analysis_user_prompt(company_name, url)}
          ],
        stream=True
    )
    
    response = ""
    display_handle = display(Markdown(""), display_id=True)
    for chunk in stream:
        response += chunk.choices[0].delta.content or ''
        response = response.replace("```","").replace("markdown", "")
        update_display(Markdown(response), display_id=display_handle.display_id)

In [None]:
create_company_analysis("Nebula.io", "https://www.linkedin.com/company/nebula-io/")