In [4]:
import requests 
from bs4 import BeautifulSoup
import google.generativeai as genai
import os 
import json
from dotenv import load_dotenv  
load_dotenv()  


# Configure Google Generative AI
genai.configure(api_key=os.environ.get("GEMINI_API_KEY"))

# Create model with system instruction (sets the AI's role)
model = genai.GenerativeModel(
    model_name='gemini-2.5-flash', 
    system_instruction="You are an expert web content summarizer and link analyzer",
    generation_config={
        'temperature': 0.3,  # Lower for more consistent responses
        'max_output_tokens': 2048,
    }
)

# Headers for web scraping
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:
    """
    A utility class to represent a Website that we have scraped, now with links
    """

    def __init__(self, url):
        self.url = url
        response = requests.get(url, headers=headers)
        self.body = response.content
        soup = BeautifulSoup(self.body, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        if soup.body:
            for irrelevant in soup.body(["script", "style", "img", "input"]):
                irrelevant.decompose()
            self.text = soup.body.get_text(separator="\n", strip=True)
        else:
            self.text = ""
        links = [link.get('href', "There is no link") for link in soup.find_all('a')]
        self.links = [link for link in links if link]

    def get_contents(self):
        return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"

# Example usage
ed = Website("https://www.digital-construction.ai")

# USER PROMPT - what you want the AI to do
user_prompt = f"""Please summarize this website and extract key information:

Title: {ed.title}
Content: {ed.text}

Please provide:
1. A brief summary of the website
2. Main topics covered
3. Key services or products mentioned
"""

response = model.generate_content(user_prompt)
print("=== WEBSITE SUMMARY ===")
print(response.text)

def get_links_analysis(url):
    """
    Analyze a website and extract meaningful links with descriptions
    """
    website = Website(url)
    
    # Create a specific prompt for link analysis
    links_prompt = f"""
    Analyze this website and its links:
    
    Website Title: {website.title}
    Website Content: {website.text[:1000]}...  # Truncate for brevity
    
    Available Links: {website.links[:20]}  # First 20 links
    
    Please provide a JSON response with the following structure:
    {{
        "website_summary": "Brief summary of the main website",
        "important_links": [
            {{
                "url": "link_url",
                "description": "what this link is about",
                "category": "internal/external/navigation/product/etc"
            }}
        ],
        "total_links_found": number_of_links
    }}
    
    Only include the most relevant and important links, maximum 10.
    """
    
    try:
        response = model.generate_content(links_prompt)
        # Try to parse as JSON, but handle if it's not perfect JSON
        result_text = response.text
        
        # Clean up the response if it has markdown formatting
        if "```json" in result_text:
            result_text = result_text.split("```json")[1].split("```")[0].strip()
        elif "```" in result_text:
            result_text = result_text.split("```")[1].split("```")[0].strip()
            
        return json.loads(result_text)
    except json.JSONDecodeError:
        # If JSON parsing fails, return the raw response
        return {"raw_response": response.text, "error": "Could not parse as JSON"}
    except Exception as e:
        return {"error": str(e)}

# Test the link analysis
print("\n=== LINK ANALYSIS ===")
links_result = get_links_analysis("https://www.digital-construction.ai")
print(json.dumps(links_result, indent=2))

=== WEBSITE SUMMARY ===
Here's a summary of the website content:

---

### 1. Brief Summary of the Website

Digital Construction is a technology partner specializing in digital solutions for the construction and infrastructure sectors. Their primary goal is to help clients manage complex projects efficiently, within time and budget, by leveraging digital tools and expertise (both self-developed and best-in-market). They offer a comprehensive suite of services focused on data management, integrated project control, application development, and process automation, serving markets such as utility construction, infrastructure, and nuclear projects. They also provide their own proprietary information management platform called Held-r.

### 2. Main Topics Covered

*   **Digital Transformation in Construction & Infrastructure:** Providing digital tools and strategies for project management.
*   **Project Management & Control:** Ensuring projects stay on track, within budget, and are effective

In [None]:
# ========================================
# DIFFERENT WAYS TO STRUCTURE PROMPTS
# ========================================

# 1. Simple user prompt (what we used above)
simple_prompt = "Summarize this website content"

# 2. Multi-part prompt with clear structure
structured_prompt = f"""
TASK: Analyze website content
WEBSITE: {ed.title}
CONTENT: {ed.text[:500]}...

INSTRUCTIONS:
1. Provide a 2-sentence summary
2. List 3 key points
3. Rate the website's clarity (1-10)

FORMAT: Use bullet points for the key points.
"""

# 3. Chat-style conversation (multiple messages)
conversation_prompt = [
    {"role": "user", "parts": ["I need help analyzing a website"]},
    {"role": "model", "parts": ["I'd be happy to help analyze a website. Please share the content you'd like me to review."]},
    {"role": "user", "parts": [f"Here's the website content: {ed.text[:500]}... Please summarize the main purpose and target audience."]}
]

# 4. Prompt with specific response format requirements
json_format_prompt = f"""
Analyze this website and respond in valid JSON format:

Website: {ed.title}
Content: {ed.text[:300]}...

Required JSON structure:
{{
    "summary": "brief summary here",
    "main_topics": ["topic1", "topic2", "topic3"],
    "target_audience": "description of target audience",
    "website_type": "business/blog/ecommerce/etc",
    "call_to_action": "main action the site wants users to take"
}}

Respond only with valid JSON, no additional text.
"""

# Test different prompt styles
print("=== STRUCTURED PROMPT EXAMPLE ===")
response1 = model.generate_content(structured_prompt)
print(response1.text)

print("\n=== JSON FORMAT PROMPT EXAMPLE ===")
response2 = model.generate_content(json_format_prompt)
print(response2.text)

# ========================================
# RESPONSE FORMAT CONTROL TECHNIQUES
# ========================================

# Method 1: Control via generation config
precise_model = genai.GenerativeModel(
    model_name='gemini-2.5-flash',
    system_instruction="You are a precise analyst who always follows format requirements exactly",
    generation_config={
        'temperature': 0.1,  # Very low for consistent formatting
        'top_p': 0.8,
        'max_output_tokens': 1024,
    }
)

# Method 2: Control via detailed prompting
format_control_prompt = """
STRICT FORMAT REQUIREMENTS:
- Start each response with "ANALYSIS:"
- Use exactly 3 bullet points
- End with "CONFIDENCE: [percentage]"
- Keep each bullet point under 20 words
- Use no markdown formatting

Website to analyze: https://www.digital-construction.ai
Content: [website content here]

Follow the format requirements exactly.
"""

print("\n=== PRECISE FORMAT CONTROL ===")
response3 = precise_model.generate_content(format_control_prompt)
print(response3.text)

In [None]:
# ========================================
# ADVANCED TECHNIQUES
# ========================================

def analyze_website_with_links(url, max_links_to_analyze=5):
    """
    Advanced website analysis that includes link context
    """
    website = Website(url)
    
    # Filter and clean links
    valid_links = []
    for link in website.links[:max_links_to_analyze]:
        if link.startswith('http') or link.startswith('/'):
            valid_links.append(link)
    
    # Create comprehensive prompt
    comprehensive_prompt = f"""
    WEBSITE ANALYSIS TASK
    
    PRIMARY CONTENT:
    Title: {website.title}
    Main Content: {website.text[:800]}...
    
    NAVIGATION LINKS FOUND:
    {chr(10).join([f"- {link}" for link in valid_links])}
    
    ANALYSIS REQUIREMENTS:
    1. CONTENT SUMMARY: What is this website about? (2-3 sentences)
    2. BUSINESS MODEL: How does this business make money?
    3. TARGET USERS: Who is the intended audience?
    4. LINK ANALYSIS: Based on the navigation links, what are the main sections/services?
    5. NEXT STEPS: What would a visitor likely do next on this site?
    
    RESPONSE FORMAT:
    Use clear headers for each section (1., 2., 3., etc.)
    Keep each section concise but informative.
    """
    
    response = model.generate_content(comprehensive_prompt)
    return response.text

def create_website_comparison_prompt(urls_list):
    """
    Create a prompt to compare multiple websites
    """
    websites_data = []
    
    for i, url in enumerate(urls_list[:3]):  # Limit to 3 websites
        try:
            site = Website(url)
            websites_data.append({
                'number': i+1,
                'url': url,
                'title': site.title,
                'content': site.text[:400],  # Truncate for comparison
                'links_count': len(site.links)
            })
        except Exception as e:
            print(f"Error analyzing {url}: {e}")
    
    comparison_prompt = "WEBSITE COMPARISON ANALYSIS\n\n"
    
    for site in websites_data:
        comparison_prompt += f"""
WEBSITE {site['number']}:
URL: {site['url']}
Title: {site['title']}
Content Preview: {site['content']}...
Links Found: {site['links_count']}

"""
    
    comparison_prompt += """
COMPARISON REQUIREMENTS:
1. Compare the main focus/purpose of each website
2. Identify target audiences for each
3. Compare the complexity (based on content and link count)
4. Suggest which website serves which type of user better
5. Rate each website's clarity (1-10 scale)

Format as a comparison table if possible.
"""
    
    return comparison_prompt

# Example usage of advanced analysis
print("=== ADVANCED WEBSITE ANALYSIS ===")
advanced_result = analyze_website_with_links("https://www.digital-construction.ai")
print(advanced_result)

# ========================================
# HANDLING DIFFERENT CONTENT TYPES
# ========================================

def create_multimodal_prompt_template():
    """
    Template for handling different types of content with Gemini
    """
    examples = {
        "text_only": "Analyze this text content: [content here]",
        
        "text_with_links": """
        Analyze this website:
        Content: [text content]
        Navigation: [links list]
        Task: [specific analysis needed]
        """,
        
        "structured_data": """
        Process this structured information:
        Data: [formatted data]
        Format requirement: [JSON/CSV/markdown/etc]
        Analysis needed: [specific requirements]
        """,
        
        "comparison_task": """
        Compare these items:
        Item 1: [details]
        Item 2: [details]
        Comparison criteria: [what to compare]
        Output format: [table/list/paragraphs]
        """
    }
    
    return examples

# Show the templates
templates = create_multimodal_prompt_template()
print("\n=== PROMPT TEMPLATES ===")
for template_type, template in templates.items():
    print(f"\n{template_type.upper()}:")
    print(template)

# ========================================
# ERROR HANDLING AND FALLBACKS
# ========================================

def robust_content_generation(prompt, max_retries=3):
    """
    Generate content with error handling and retries
    """
    for attempt in range(max_retries):
        try:
            response = model.generate_content(prompt)
            
            # Check if response is valid
            if response.text and len(response.text.strip()) > 10:
                return {
                    "success": True,
                    "content": response.text,
                    "attempt": attempt + 1
                }
            else:
                print(f"Attempt {attempt + 1}: Response too short or empty")
                
        except Exception as e:
            print(f"Attempt {attempt + 1} failed: {e}")
            if attempt == max_retries - 1:
                return {
                    "success": False,
                    "error": str(e),
                    "attempts": max_retries
                }
    
    return {"success": False, "error": "All attempts failed"}

# Test robust generation
print("\n=== ROBUST CONTENT GENERATION TEST ===")
test_result = robust_content_generation("Summarize the benefits of AI in web development")
if test_result["success"]:
    print(f"✅ Success on attempt {test_result['attempt']}")
    print(test_result["content"][:200] + "...")
else:
    print(f"❌ Failed: {test_result['error']}")

# Summary: Key Concepts for Google Generative AI Prompting

## 1. **System vs User Prompts**

- **System Instruction**: Set when creating the model - defines the AI's role/personality
- **User Prompt**: The actual request/question you send to the model

## 2. **Ways to Pass Content**

- **Simple text**: `model.generate_content("Your prompt")`
- **Structured data**: Include formatted content in your prompt
- **Multiple parts**: Use arrays for complex prompts
- **Links/References**: Include them as part of your text prompt

## 3. **Response Format Control**

- **Generation Config**: Control temperature, max tokens, etc.
- **Prompt Engineering**: Ask for specific formats (JSON, tables, bullet points)
- **System Instructions**: Set overall behavior and style
- **Format Templates**: Use consistent prompt structures

## 4. **Best Practices**

- Use clear, specific instructions
- Provide examples of desired output format
- Handle errors with try-catch blocks
- Test different temperature settings for your use case
- Include context and constraints in your prompts


In [5]:
# ========================================
# PRACTICAL EXAMPLE: COMPLETE WORKFLOW
# ========================================

def complete_website_analysis_workflow(url):
    """
    A complete example showing all the concepts together
    """
    
    # Step 1: Create specialized model for this task
    analyzer_model = genai.GenerativeModel(
        model_name='gemini-2.5-flash',
        system_instruction="""You are a professional web analyst. 
        You provide clear, structured analysis of websites including content, 
        user experience, and business insights. Always be objective and constructive.""",
        generation_config={
            'temperature': 0.4,  # Balanced creativity and consistency
            'max_output_tokens': 2048,
            'top_p': 0.9
        }
    )
    
    # Step 2: Gather website data
    try:
        website = Website(url)
    except Exception as e:
        return f"Error loading website: {e}"
    
    # Step 3: Create comprehensive prompt with clear structure
    analysis_prompt = f"""
    WEBSITE ANALYSIS REQUEST
    
    URL: {url}
    Title: {website.title}
    Content Length: {len(website.text)} characters
    Links Found: {len(website.links)}
    
    CONTENT SAMPLE:
    {website.text[:600]}...
    
    TOP NAVIGATION LINKS:
    {chr(10).join([f"• {link}" for link in website.links[:8]])}
    
    ANALYSIS FRAMEWORK:
    Please analyze this website using the following structure:
    
    ## 🎯 PRIMARY PURPOSE
    What is the main goal of this website?
    
    ## 👥 TARGET AUDIENCE  
    Who is this website designed for?
    
    ## 🏢 BUSINESS MODEL
    How does this organization/business operate?
    
    ## 🧭 USER JOURNEY
    What path would typical visitors take?
    
    ## ⭐ STRENGTHS
    What does this website do well?
    
    ## 🔧 IMPROVEMENT OPPORTUNITIES
    What could be enhanced?
    
    ## 📊 OVERALL RATING
    Rate the website (1-10) for clarity, usability, and effectiveness.
    
    RESPONSE STYLE: Use emojis for headers, be concise but thorough, provide actionable insights.
    """
    
    # Step 4: Generate analysis with error handling
    try:
        response = analyzer_model.generate_content(analysis_prompt)
        return response.text
    except Exception as e:
        return f"Error generating analysis: {e}"

# Run the complete workflow
print("🚀 RUNNING COMPLETE WEBSITE ANALYSIS WORKFLOW")
print("=" * 60)

result = complete_website_analysis_workflow("https://www.digital-construction.ai")
print(result)

print("\n" + "=" * 60)
print("✅ WORKFLOW COMPLETE!")

# Bonus: Quick comparison function
def quick_compare_websites(url1, url2):
    """
    Quick comparison between two websites
    """
    comparison_prompt = f"""
    Compare these two websites briefly:
    
    Website 1: {Website(url1).title} - {Website(url1).text[:200]}...
    Website 2: {Website(url2).title} - {Website(url2).text[:200]}...
    
    Provide a 3-point comparison:
    1. Main difference in purpose
    2. Target audience difference  
    3. Which is more user-friendly and why
    
    Keep it concise - 2-3 sentences per point.
    """
    
    response = model.generate_content(comparison_prompt)
    return response.text

# Example of website comparison (uncomment to test)
# print("\n🔄 WEBSITE COMPARISON EXAMPLE:")
# comparison = quick_compare_websites("https://www.google.com", "https://www.digital-construction.ai")
# print(comparison)

🚀 RUNNING COMPLETE WEBSITE ANALYSIS WORKFLOW
Here's an analysis of the Digital Construction website based on the provided information:

## 🎯 PRIMARY PURPOSE
The primary purpose of this website is to establish Digital Construction as a leading technology partner for the "Bouw en Infra" (Construction and Infrastructure) sector. It aims to showcase their specialized services (data analysis, project control, information management, GIS, application integration) and generate leads from companies seeking digital transformation and efficiency improvements in their projects.

## 👥 TARGET AUDIENCE
This website is designed for companies and organizations within the construction and infrastructure industries, primarily in the Netherlands. The target audience includes decision-makers, project managers, IT managers, data managers, and operational leaders who are looking to leverage advanced technology for improved project efficiency, better data management, and strategic insights.

## 🏢 BUSINESS MO

In [2]:
a = 10

In [3]:
b =10