In [1]:
#| default_exp website

In [2]:
# | export
from typing import Optional, List
from pymongo.database import Database
from pymongo.results import InsertOneResult, UpdateResult
from bson.objectid import ObjectId
from dataclasses import dataclass, asdict
from typing import Optional, Dict, Any
from datetime import datetime
import re
from urllib.parse import urlparse

In [3]:
#| export
@dataclass
class Website:
    """Class representing a website in our SEO analysis system"""

    url: str
    name: str
    description: Optional[str] = None
    language: str = "en"
    created_at: datetime = datetime.now()

    def __post_init__(self):
        """Validate data after initialization"""
        self.validate_url()
        self.validate_name()
        self.validate_language()

    def validate_url(self) -> None:
        """Validate URL format"""
        parsed = urlparse(self.url)
        if not all([parsed.scheme, parsed.netloc]):
            raise ValueError(f"Invalid URL format: {self.url}")

    def validate_name(self) -> None:
        """Validate website name"""
        if not self.name or len(self.name.strip()) < 2:
            raise ValueError("Name must be at least 2 characters long")

    def validate_language(self) -> None:
        """Validate language code"""
        if not re.match(r"^[a-z]{2}(-[A-Z]{2})?$", self.language):
            raise ValueError(f"Invalid language code: {self.language}")

    def to_dict(self) -> Dict[str, Any]:
        """Convert website data to dictionary for MongoDB storage"""
        return asdict(self)

    @classmethod
    def from_dict(cls, data: Dict[str, Any]) -> "Website":
        """Create Website instance from dictionary data"""
        return cls(**data)

In [4]:
# | export
class WebsiteStore:
    """Handle MongoDB operations for Website objects"""

    def __init__(self, db: Database):
        self.db = db
        self.collection = self.db.websites

    def insert_or_update_website(self, website: Website) -> str:
        """Insert website if not exists, or update if exists based on URL"""
        website_dict = website.to_dict()

        # Try to update existing website
        result = self.collection.update_one(
            {"url": website.url},  # Find by URL
            {"$set": website_dict},  # Update all fields
            upsert=True,  # Create if doesn't exist
        )

        if result.upserted_id:
            return str(result.upserted_id)
        else:
            # Get the _id of the existing document
            existing = self.collection.find_one({"url": website.url})
            return str(existing["_id"])

    def delete_website(self, website_id: str) -> bool:
        """Delete website by ID"""
        try:
            result = self.collection.delete_one({"_id": ObjectId(website_id)})
            return result.deleted_count > 0
        except Exception as e:
            print(f"Error deleting website: {e}")
            return False

In [8]:
#|test
# Test valid website creation
print("Testing valid website creation...")
valid_website = Website(
    url="https://example.com",
    name="Example Site",
    description="A test website",
    language="en"
)
print(f"Created website: {valid_website.to_dict()}")
assert valid_website.url == "https://example.com"
assert valid_website.name == "Example Site"

# Test URL validation
print("\nTesting URL validation...")
try:
    invalid_website = Website(url="not_a_url", name="Test")
    assert False, "Should raise ValueError for invalid URL"
except ValueError as e:
    print(f"Caught expected ValueError: {e}")

# Test name validation
print("\nTesting name validation...")
try:
    invalid_name = Website(url="https://example.com", name="")
    assert False, "Should raise ValueError for invalid name"
except ValueError as e:
    print(f"Caught expected ValueError: {e}")

print("\nAll tests passed successfully!")


Testing valid website creation...
Created website: {'url': 'https://example.com', 'name': 'Example Site', 'description': 'A test website', 'language': 'en', 'created_at': datetime.datetime(2024, 12, 9, 7, 2, 4, 992637)}

Testing URL validation...
Caught expected ValueError: Invalid URL format: not_a_url

Testing name validation...
Caught expected ValueError: Name must be at least 2 characters long

All tests passed successfully!


In [6]:
#|test
from unittest.mock import Mock, MagicMock
from bson import ObjectId

# Create mock database and collection
mock_collection = MagicMock()
mock_db = MagicMock()
mock_db.websites = mock_collection

# Create test website store
store = WebsiteStore(mock_db)

# Test website for operations
test_website = Website(
    url="https://test.com",
    name="Test Site"
)

# Test insert_or_update_website (new website)
mock_collection.update_one.return_value = MagicMock(
    upserted_id=ObjectId("507f1f77bcf86cd799439011")
)
inserted_id = store.insert_or_update_website(test_website)
assert inserted_id == "507f1f77bcf86cd799439011"

# Test insert_or_update_website (existing website)
mock_collection.update_one.return_value = MagicMock(upserted_id=None)
mock_collection.find_one.return_value = {"_id": ObjectId("507f1f77bcf86cd799439011")}
updated_id = store.insert_or_update_website(test_website)
assert updated_id == "507f1f77bcf86cd799439011"

# Test delete_website (successful)
mock_collection.delete_one.return_value = MagicMock(deleted_count=1)
assert store.delete_website("507f1f77bcf86cd799439011") == True

# Test delete_website (not found)
mock_collection.delete_one.return_value = MagicMock(deleted_count=0)
assert store.delete_website("507f1f77bcf86cd799439011") == False
