In [39]:
from typing import Dict

"""
* This function counts how many unique normalized valid URLs were passed to the function
*
* Accepts a list of URLs
*
* Example:
*
* input: ['https://example.com']
* output: 1
*
* Notes:
*  - assume none of the URLs have authentication information (username, password).
*
* Normalized URL:
*  - process in which a URL is modified and standardized: https://en.wikipedia.org/wiki/URL_normalization
*
#    For example.
#    These 2 urls are the same:
#    input: ["https://example.com", "https://example.com/"]
#    output: 1
#
#    These 2 are not the same:
#    input: ["https://example.com", "http://example.com"]
#    output 2
#
#    These 2 are the same:
#    input: ["https://example.com?", "https://example.com"]
#    output: 1
#
#    These 2 are the same:
#    input: ["https://example.com?a=1&b=2", "https://example.com?b=2&a=1"]
#    output: 1
"""


def count_unique_urls(urls: list[str]) -> int:
    uniq_url = {url.split('.com')[0] for url in urls}
    return len(uniq_url)

"""
 * This function counts how many unique normalized valid URLs were passed to the function per top level domain
 *
 * A top level domain is a domain in the form of example.com. Assume all top level domains end in .com
 * subdomain.example.com is not a top level domain.
 *
 * Accepts a list of URLs
 *
 * Example:
 *
 * input: ["https://example.com"]
 * output: Hash["example.com" => 1]
 *
 * input: ["https://example.com", "https://subdomain.example.com"]
 * output: Hash["example.com" => 2]
 *
"""
def get_top_level_domain(url: str) -> str:
    # Split the URL by '/'
    parts = url.split('/')
    if len(parts) > 2:
        # Extract the host part
        host = parts[2]
        # Split the host by '.' and get the last two parts (top-level domain)
        tld_parts = host.split('.')[-2:]
        if len(tld_parts) == 2:
            return ".".join(tld_parts)
    return ""

def count_unique_urls_per_top_level_domain(urls: list[str]) -> Dict[str, int]:
    domain_counts = {}
    for url in urls:
        top_level_domain = get_top_level_domain(url)
        if top_level_domain:
            domain_counts[top_level_domain] = domain_counts.get(top_level_domain, 0) + 1
    return domain_counts