In [7]:
import os
import sys
import json
from dotenv import load_dotenv
from openai import OpenAI

current_dir = os.getcwd()
if os.path.basename(current_dir) == 'notebooks':
    project_dir = os.path.dirname(current_dir)
else:
    project_dir = current_dir

project_dir = os.path.abspath(project_dir)
if project_dir not in sys.path:
    sys.path.insert(0, project_dir)

from utils.scraper import fetch_website_contents, fetch_website_links

In [11]:
load_dotenv(override=True)
api_key = os.getenv("GEMINI_API_KEY")

if not api_key:
    print("No API key was found")
elif api_key.strip() != api_key:
    print("An API key was found, but it looks like it might have space or tab characters at the start or end")
else:
    print("API key found!")

API key found!


In [13]:
MODEL = "gemini-2.5-flash"
gemini = OpenAI(api_key=api_key,
                base_url="https://generativelanguage.googleapis.com/v1beta/openai/")

In [17]:
links = fetch_website_links("https://www.google.com")
links

['https://mail.google.com/mail/&ogbl',
 'https://www.google.com/imghp?hl=ar&ogbl',
 'https://www.google.com.eg/intl/ar/about/products',
 'https://accounts.google.com/ServiceLogin?hl=ar&passive=true&continue=https://www.google.com/&ec=futura_exp_og_so_72776762_e',
 'https://www.google.com/setprefs?sig=0_8-9XCtEgizvW1Er36g7X8-svMvk%3D&hl=en&source=homepage&sa=X&ved=0ahUKEwi6huumsJWRAxW6hP0HHR33DKEQ2ZgBCBY',
 'https://about.google/?utm_source=google-EG&utm_medium=referral&utm_campaign=hp-footer&fg=1',
 'https://www.google.com/intl/ar_eg/ads/?subid=ww-ww-et-g-awa-a-g_hpafoot1_1!o2&utm_source=google.com&utm_medium=referral&utm_campaign=google_hpafooter&fg=1',
 'https://www.google.com/services/?subid=ww-ww-et-g-awa-a-g_hpbfoot1_1!o2&utm_source=google.com&utm_medium=referral&utm_campaign=google_hpbfooter&fg=1',
 'https://google.com/search/howsearchworks/?fg=1',
 'https://sustainability.google/intl/ar/carbon-free/?utm_source=googlehpfooter&utm_medium=housepromos&utm_campaign=bottom-footer&utm_

In [37]:
link_system_prompt = """
You are a company marketing intelligence extractor.

Your job is to analyze a list of website URLs and return every link that is useful for:
- Branding / Brochure design / Company presentation
- Product & solution overview
- Business positioning, strategy, clients & value
- Careers & hiring
- Investor & corporate-level communication
- Press, PR, news, media, events
- Sustainability/ESG/initiatives/vision
- Customer success stories or case studies
- Contact & business onboarding pathways

You must extract **every possibly relevant link**, not just the obvious ones.

Answer STRICTLY in JSON:

{
    "links":[
        {
            "url":"full url here",
            "type":"category label",
            "specific_type":"specific type of link",
            "importance_score": 1-100,
            "why_useful": "short marketing benefit reason"
        }
    ]
}

RELEVANT CATEGORIES YOU SHOULD CONSIDER:
• About / Company / Mission / Vision
• Products / Services / Solutions
• Pricing & plans (if available)
• Enterprise packages or cloud offerings
• Advertising & business solutions
• Developers platform / API ecosystem
• Careers
• Press / Blog / Media / News
• Investors / Corporate governance
• Contact / Support channels
• Sustainability & ESG initiatives
• Partnerships / Affiliations / Research programs
• Case studies / Testimonials / Portfolio

DO NOT include:
• Login/account/dashboard
• TOS / Privacy / Cookies / Legal
• Mailto links
• Useless navigation endpoints

Return everything valuable for marketing, not minimal results.
"""


In [32]:
def get_links_user_prompt(url):
    links = fetch_website_links(url)

    user_prompt = f"""
Extract **all marketing-valuable links** from the website:

URL: {url}

You are given raw links below — your job is to identify every link that can be useful in marketing,
branding, brochures, business presentations, product communication, investor material, hiring,
or public-facing messaging.

Raw links discovered from site:

{"\n".join(links)}

Return output **strictly in JSON** only — NO commentary, NO explanation, NO Markdown.
"""

    return user_prompt


In [33]:
print(get_links_user_prompt("https://www.google.com"))


Extract **all marketing-valuable links** from the website:

URL: https://www.google.com

You are given raw links below — your job is to identify every link that can be useful in marketing,
branding, brochures, business presentations, product communication, investor material, hiring,
or public-facing messaging.

Raw links discovered from site:

https://mail.google.com/mail/&ogbl
https://www.google.com/imghp?hl=ar&ogbl
https://www.google.com.eg/intl/ar/about/products
https://accounts.google.com/ServiceLogin?hl=ar&passive=true&continue=https://www.google.com/&ec=futura_exp_og_so_72776762_e
https://www.google.com/setprefs?sig=0_HXamtm1Ogn2IJb7KzrRJSOSOz_I%3D&hl=en&source=homepage&sa=X&ved=0ahUKEwjM6MrHwpWRAxVshv0HHSE7CJMQ2ZgBCBY
https://about.google/?utm_source=google-EG&utm_medium=referral&utm_campaign=hp-footer&fg=1
https://www.google.com/intl/ar_eg/ads/?subid=ww-ww-et-g-awa-a-g_hpafoot1_1!o2&utm_source=google.com&utm_medium=referral&utm_campaign=google_hpafooter&fg=1
https://www.google

In [38]:
def select_relevant_links(url):
    ress = gemini.chat.completions.create(
        model=MODEL,
        messages=[
        {
            "role": "system",
            "content": link_system_prompt
        },
        {
            "role": "user",
            "content": get_links_user_prompt(url)
        }
        ],
        response_format={"type": "json_object"}
    )
    return json.loads(ress.choices[0].message.content)

In [39]:
select_relevant_links("https://www.google.com")

{'links': [{'url': 'https://www.google.com/imghp?hl=ar&ogbl',
   'type': 'Products / Services / Solutions',
   'specific_type': 'Product & solution overview',
   'importance_score': 75,
   'why_useful': 'Showcases a core Google product for branding and solution overview.'},
  {'url': 'https://www.google.com.eg/intl/ar/about/products',
   'type': 'Products / Services / Solutions',
   'specific_type': 'Product & solution overview',
   'importance_score': 90,
   'why_useful': "Provides a comprehensive overview of Google's various product offerings."},
  {'url': 'https://about.google/?utm_source=google-EG&utm_medium=referral&utm_campaign=hp-footer&fg=1',
   'type': 'About / Company / Mission / Vision',
   'specific_type': 'Company overview',
   'importance_score': 95,
   'why_useful': "Essential for understanding Google's mission, values, and overall company narrative for branding and presentations."},
  {'url': 'https://www.google.com/intl/ar_eg/ads/?subid=ww-ww-et-g-awa-a-g_hpafoot1_1!o2

In [40]:
select_relevant_links("https://abdoasem.com")

{'links': [{'url': 'https://abdoasem.com/',
   'type': 'About / Company / Mission / Vision',
   'specific_type': 'Homepage / Overview',
   'importance_score': 95,
   'why_useful': "Essential for establishing initial brand identity, presenting a high-level overview of skills and value, and setting the tone for the individual's professional presence."},
  {'url': 'https://abdoasem.com/about',
   'type': 'About / Company / Mission / Vision',
   'specific_type': 'Personal Bio / Experience',
   'importance_score': 90,
   'why_useful': "Provides detailed insights into the individual's background, professional journey, values, and unique selling propositions, critical for trust-building and strategic positioning."},
  {'url': 'https://abdoasem.com/projects',
   'type': 'Products / Services / Solutions',
   'specific_type': 'Portfolio / Work Showcase',
   'importance_score': 98,
   'why_useful': 'Showcases practical application of skills, demonstrates problem-solving abilities through concrete

In [45]:
def fetch_page_and_all_relevant_links(url):
    contents = fetch_website_contents(url)

    relevant_links = select_relevant_links(url)

    result = f"## Landing page contents\n\n{contents}\n\n## Relevant links\n"

    for link in relevant_links["links"]:
        result += f"* [{link['specific_type']}]({link['url']})\n"
        result += f"{fetch_website_contents(link['url'])}\n\n"

    return result


fetch_page_and_all_relevant_links("https://abdoasem.com")


'## Landing page contents\n\n{\'url\': \'https://abdoasem.com\', \'canonical\': \'https://abdoasem.com/\', \'title\': \'AbdolRahman Asem - Front-End Engineer | Modern Web Developer\', \'meta\': {\'viewport\': \'width=device-width, initial-scale=1.0\', \'description\': \'AbdolRahman Asem is a skilled front-end engineer specializing in modern web development, React, and creating stunning user experiences. View my portfolio and projects.\', \'author\': \'AbdolRahman Asem\', \'keywords\': \'front-end engineer, web developer, React, JavaScript, TypeScript, portfolio, modern web development\', \'og:type\': \'website\', \'og:url\': \'https://abdolrahman-asem.com/\', \'og:title\': \'AbdolRahman Asem - Front-End Engineer | Modern Web Developer\', \'og:description\': \'AbdolRahman Asem is a skilled front-end engineer specializing in modern web development, React, and creating stunning user experiences. View my portfolio and projects.\', \'twitter:card\': \'summary_large_image\', \'twitter:url\':

In [48]:
print(fetch_page_and_all_relevant_links("https://www.google.com"))

## Landing page contents

{'url': 'https://www.google.com', 'canonical': None, 'title': 'Google', 'meta': {'referrer': 'origin', None: '/images/branding/googleg/1x/googleg_standard_color_128dp.png'}, 'opengraph': {}, 'jsonld': [], 'text': 'Google Gmail صور تسجيل الدخول وضع AI عرض المزيد حذف حذف الإبلاغ عن عبارات بحث مقترحة غير ملائمة محرّك بحث Google متوفّر باللغة: English مصر لمحة الإعلانات الأعمال آلية عمل "بحث Google" العقد الثالث من نشاطنا في مجال المناخ: تحقق من عملنا الخصوصية البنود الإعدادات إعدادات البحث بحث متقدم بياناتك في "بحث Google" سجلّ البحث البحث في المساعدة إرسال ملاحظات المظهر الداكن: غير مفعّل تطبيقات Google', 'links': ['https://mail.google.com/mail/&ogbl', 'https://www.google.com/imghp?hl=ar&ogbl', 'https://www.google.com.eg/intl/ar/about/products', 'https://accounts.google.com/ServiceLogin?hl=ar&passive=true&continue=https://www.google.com/&ec=futura_exp_og_so_72776762_e', 'https://www.google.com/setprefs?sig=0_oXY-TiT0IxR43_R-n9ULEXxa95M%3D&hl=en&source=homepage&s

In [63]:
brochure_system_prompt = """
You are the ultimate assistant for analyzing company websites and creating high-impact brochures.
Your goal is to summarize the company's identity in a concise, professional, and persuasive manner
for prospective customers, investors, and recruits. Respond in Markdown without code blocks.

Make sure to include:

1. **Company Overview:** Who they are, what they do, and their market presence.
2. **Products & Services:** Core offerings and unique selling points.
3. **Company Culture:** Values, mission, work environment, and team dynamics.
4. **Customers & Partners:** Key clients, target audience, and strategic partners.
5. **Careers & Opportunities:** Available roles, career growth, and employee benefits.
6. **Contact & Online Presence:** Website, social media, and other relevant links.

Write it in a way that is engaging, professional, and ready to be distributed to stakeholders.
Use headings, bullet points, and short paragraphs to improve readability.
Always prioritize clarity, impact, and persuasiveness.

Sectionize the output into sections with headings and lines breaks.
also return as a markdown
"""


In [64]:
def get_brochure_user_prompt(company_name, url):
    base_prompt = f"""
You are creating a professional brochure for the company: {company_name}.
Use the following contents from its landing page and other relevant pages
to summarize the company for prospective customers, investors, and recruits.
Respond in Markdown without code blocks.

Focus on these sections if the information is available:
1. Company Overview: Who they are, what they do, and their market presence.
2. Products & Services: Core offerings and unique selling points.
3. Company Culture: Values, mission, work environment, and team dynamics.
4. Customers & Partners: Key clients, target audience, and strategic partners.
5. Careers & Opportunities: Available roles, career growth, and employee benefits.
6. Contact & Online Presence: Website, social media, and other relevant links.
"""

    website_contents = fetch_page_and_all_relevant_links(url)

    user_prompt = base_prompt + "\n\n" + website_contents

    user_prompt = user_prompt[:30_000]

    return user_prompt


In [65]:
print(get_brochure_user_prompt("Abdo Asem", "https://abdoasem.com"))


You are creating a professional brochure for the company: Abdo Asem.
Use the following contents from its landing page and other relevant pages
to summarize the company for prospective customers, investors, and recruits.
Respond in Markdown without code blocks.

Focus on these sections if the information is available:
1. Company Overview: Who they are, what they do, and their market presence.
2. Products & Services: Core offerings and unique selling points.
3. Company Culture: Values, mission, work environment, and team dynamics.
4. Customers & Partners: Key clients, target audience, and strategic partners.
5. Careers & Opportunities: Available roles, career growth, and employee benefits.
6. Contact & Online Presence: Website, social media, and other relevant links.


## Landing page contents

{'url': 'https://abdoasem.com', 'canonical': 'https://abdoasem.com/', 'title': 'AbdolRahman Asem - Front-End Engineer | Modern Web Developer', 'meta': {'viewport': 'width=device-width, initial-sc

In [77]:
from IPython.display import display, Markdown, update_display

def create_brochure(company_name, url, stream=False):
    user_prompt = get_brochure_user_prompt(company_name, url)
    
    if stream:
        response_text = ""
        display_handle = display(Markdown(""), display_id=True)
        
        stream_obj = gemini.chat.completions.create(
            model=MODEL,
            messages=[
                {"role": "system", "content": brochure_system_prompt},
                {"role": "user", "content": user_prompt}
            ],
            stream=True
        )
        
        for chunk in stream_obj:
            delta_content = chunk.choices[0].delta.content or ""
            response_text += delta_content
            update_display(Markdown(response_text), display_id=display_handle.display_id)
        
        return response_text
    else:
        response = gemini.chat.completions.create(
            model=MODEL,
            messages=[
                {"role": "system", "content": brochure_system_prompt},
                {"role": "user", "content": user_prompt}
            ]
        )
        return response.choices[0].message.content


In [79]:
brochure = create_brochure("Abdo Asem", "https://abdoasem.com", stream=True)
brochure

# AbdolRahman Asem: Crafting Modern Web Experiences

## Professional Overview
AbdolRahman Asem is a highly skilled Front-End Engineer and Modern Web Developer dedicated to transforming digital visions into captivating realities. With a profound specialization in contemporary web development practices, AbdolRahman leverages cutting-edge technologies to build robust, scalable, and aesthetically superior user interfaces. His professional presence is defined by a commitment to delivering exceptional front-end solutions that elevate user engagement and drive business value.

---

## Products & Services
AbdolRahman Asem offers specialized front-end development services focused on creating dynamic and user-centric web applications. His core offerings are built upon a foundation of modern web standards and best practices.

*   **Modern Web Application Development:** Building responsive, high-performance, and visually appealing web interfaces from concept to deployment.
*   **Rich User Experience (UX) Design & Implementation:** Specializing in crafting intuitive and "stunning" user experiences that are both functional and delightful.
*   **Key Technology Stack:** Expertise in leading front-end technologies including **React**, **JavaScript**, and **TypeScript** for robust and maintainable codebases.
*   **Portfolio & Project Showcasing:** Demonstrating proven capabilities through a curated collection of impactful projects.

---

## Professional Philosophy & Approach
AbdolRahman's work is driven by a passion for innovation and a meticulous approach to development. His professional philosophy centers on:

*   **Excellence in Execution:** A commitment to delivering high-quality code and polished user interfaces.
*   **User-Centric Design:** Prioritizing the end-user experience, ensuring applications are intuitive, accessible, and engaging.
*   **Continuous Learning:** Staying abreast of the latest advancements in modern web technologies to provide forward-thinking solutions.
*   **Precision & Detail:** An eye for detail in both design implementation and code architecture, ensuring a seamless and refined product.

---

## Clients & Collaborations
AbdolRahman Asem targets clients and organizations seeking expert front-end development to enhance their digital presence. His ideal collaborations involve projects that demand:

*   **Innovative Web Solutions:** Businesses looking to build new, modern web applications or revamp existing ones.
*   **Superior User Experiences:** Companies prioritizing exceptional UI/UX to stand out in their market.
*   **Technology-Forward Development:** Clients who value the use of modern frameworks like React and robust languages like TypeScript.

---

## Careers & Opportunities
As an individual professional and a portfolio showcase, AbdolRahman Asem's platform is primarily focused on presenting his professional capabilities and projects. Therefore, this site does not typically feature traditional career openings or employee benefits in the corporate sense. However, AbdolRahman is open to new project opportunities and collaborations where his expertise can contribute to successful outcomes.

---

## Contact & Online Presence
Connect with AbdolRahman Asem to discuss your next web project or to explore potential collaborations.

*   **Website:** [https://abdoasem.com](https://abdoasem.com/)
*   **Portfolio:** [https://abdoasem.com/work](https://abdoasem.com/work)
*   **Services:** [https://abdoasem.com/services](https://abdoasem.com/services)
*   **Contact Form:** [https://abdoasem.com/contact](https://abdoasem.com/contact)

Please visit the website for direct contact information and to view his comprehensive portfolio.

'# AbdolRahman Asem: Crafting Modern Web Experiences\n\n## Professional Overview\nAbdolRahman Asem is a highly skilled Front-End Engineer and Modern Web Developer dedicated to transforming digital visions into captivating realities. With a profound specialization in contemporary web development practices, AbdolRahman leverages cutting-edge technologies to build robust, scalable, and aesthetically superior user interfaces. His professional presence is defined by a commitment to delivering exceptional front-end solutions that elevate user engagement and drive business value.\n\n---\n\n## Products & Services\nAbdolRahman Asem offers specialized front-end development services focused on creating dynamic and user-centric web applications. His core offerings are built upon a foundation of modern web standards and best practices.\n\n*   **Modern Web Application Development:** Building responsive, high-performance, and visually appealing web interfaces from concept to deployment.\n*   **Rich U

In [None]:
brochure = create_brochure("Abdo Asem", "https://abdoasem.com")
print(brochure)

# AbdolRahman Asem: Crafting Modern Web Experiences

## Professional Overview

AbdolRahman Asem is a highly skilled and dedicated Front-End Engineer specializing in modern web development. With expertise in building robust, responsive, and visually stunning user interfaces, AbdolRahman transforms complex requirements into intuitive and engaging digital experiences. His work is characterized by a commitment to cutting-edge technologies and best practices, ensuring high-performance and future-proof web solutions.

***

## Services & Core Expertise

AbdolRahman Asem offers specialized front-end engineering services, focusing on delivering exceptional web applications. His core offerings leverage a deep understanding of contemporary web technologies to provide clients with a competitive edge.

*   **Modern Web Development:** Building dynamic and responsive web applications from concept to deployment.
*   **React Development:** Expert in creating scalable and efficient user interfaces using

In [71]:
brochure = create_brochure("Hugging Face", "https://huggingface.co")
print(brochure)

# Hugging Face: The AI Community Building the Future

Hugging Face is at the forefront of advancing and democratizing artificial intelligence, serving as the essential collaboration platform for the global machine learning community.

***

## Company Overview

Hugging Face is a leading hub for artificial intelligence, dedicated to making cutting-edge machine learning accessible to everyone through open source and open science. We provide a collaborative platform where researchers, developers, and organizations come together to build, share, and deploy AI models, datasets, and applications. Our mission is to accelerate AI innovation and empower the next generation of AI builders.

***

## Products & Services

Hugging Face offers a comprehensive suite of tools and resources that form the backbone of modern ML development:

*   **Models:** Access over 1 million pre-trained models across all modalities (text, image, video, audio, 3D), enabling rapid experimentation and deployment of state-