In [4]:
url = 'https://wcrp-cmip.github.io/CMIP7-CVs/_context_'
ourl = 'https://bob.github.io/CMIP7-CVs/_context_'

In [6]:
import requests
from requests.adapters import HTTPAdapter
from urllib.parse import urlparse

url_map = {
    "bob.github.io": "wcrp-cmip.github.io",
    "example.com": "localhost:8000"
}

# --- Step 1: Define an adapter that rewrites URLs
class URLRewritingAdapter(HTTPAdapter):
    def __init__(self, url_map, **kwargs):
        self.url_map = url_map
        super().__init__(**kwargs)

    def send(self, request, **kwargs):
        parsed = urlparse(request.url)
        original_host = parsed.hostname

        if original_host in self.url_map:
            new_host = self.url_map[original_host]
            new_url = request.url.replace(original_host, new_host)
            request.url = new_url

            # 🛠️ Preserve original host for Host header (virtual hosting)
            request.headers['Host'] = new_host

        return super().send(request, **kwargs)

# --- Step 2: Monkey-patch the default global session
adapter = URLRewritingAdapter(url_map)
session = requests.Session()
session.mount("http://", adapter)
session.mount("https://", adapter)

# Replace the default session used by `requests.get/post/etc.`
requests.sessions.Session = lambda: session

# --- Step 3: Test it
response = requests.get(ourl, verify=False)

print("Final URL:", response.url)
print("Status:", response.status_code)
print(response.text[:300])


Final URL: https://wcrp-cmip.github.io/CMIP7-CVs/_context_
Status: 200
{
    "@context": {
        "@base": "https://wcrp-cmip.github.io/CMIP7-CVs/",
        "@vocab": "https://wcrp-cmip.github.io/CMIP7-CVs/",
        "entries": "@none",
        "id": "@id",
        "type": "@type",
        "cf": "https://wcrp-cmip.github.io/CF/",
        "cmip6plus": "https://wcrp-cmi




In [23]:
import requests
from requests.adapters import HTTPAdapter
from urllib.parse import urlparse
from rich.console import Console,Group
from rich.table import Table
from rich.panel import Panel

default_session = requests.Session

class RequestRedirector:
    def __init__(self, url_map={}):
        

        self.default_url_map = url_map.copy()
        self.url_map = url_map

        # Save the original session to restore later
        self.default_session = default_session

        if url_map is not None:
            # Monkey-patch the requests session
            self._patch_requests()
            # otherwise it will use the default session


    def _patch_requests(self):
        """Monkey-patches the `requests` session."""
        adapter = URLRewritingAdapter(self.url_map)
        session = requests.Session()
        session.mount("http://", adapter)
        session.mount("https://", adapter)
        # Replaces the default global session used by `requests.get()`, `requests.post()`, etc.
        requests.sessions.Session = lambda: session

    def restore_defaults(self):
        """Restores the default session and URL redirects."""
        self.url_map = self.default_url_map.copy()

        # Restore the original session
        requests.sessions.Session = self.default_session
        print("Restored default session and redirects.")

    def add_redirect(self, old_url, new_url):
        """Adds a new URL redirect and re-applies the patch."""
        self.url_map[old_url] = new_url
        self._patch_requests()  # Re-apply the patch with the updated redirects
        print(f"Redirect added: {old_url} -> {new_url}")

    def list_redirects(self):
        """Displays the list of current URL redirects using the `rich` library."""
        table = Table(title="Current URL Redirects")
        table.add_column("Original URL", justify="center", style="cyan")
        table.add_column("Redirected To", justify="center", style="magenta")

        for original, new in self.url_map.items():
            # Wrap each URL in a Panel (a box) to display it as a box
            original_box = Panel(original, expand=True, width=30)
            new_box = Panel(new, expand=True, width=30)
            
                    # if members:
            panels = [Panel(member, expand=True, width=30) for member in [new, original,'bob']]
            new_box = Group(*panels)
            
            
            # # Add the wrapped URLs (boxes) as rows in the table
            # print(f"Original URL: {original}")
            # print(f"Redirected To: {new}")
            # table.add_row(original, new)
            table.add_row(original_box, new_box)

        console = Console()
        console.print(table)

    def test_redirect(self, url):
        """Test a redirect by making a request and displaying the final URL."""
        response = requests.get(url, verify=False)
        print(f"Original URL: {url}")
        print(f"Final URL: {response.url}")
        print(f"Status Code: {response.status_code}")
        print(response.text[:300])  # Display first 300 characters of the response text


class URLRewritingAdapter(HTTPAdapter):
    def __init__(self, url_map, **kwargs):
        self.url_map = url_map
        super().__init__(**kwargs)

    def send(self, request, **kwargs):
        parsed = urlparse(request.url)
        original_host = parsed.hostname

        if original_host in self.url_map:
            new_host = self.url_map[original_host]
            # we can add additional replications here 
            new_url = request.url.replace(original_host, new_host)
            request.url = new_url

            # 🛠️ Preserve original host for Host header (virtual hosting)
            request.headers['Host'] = new_host

        return super().send(request, **kwargs)


# Example Usage:

# Initialize the RequestRedirector with the default redirects
request_redirector = RequestRedirector()

# Add a new redirect
request_redirector.add_redirect("bob.github.io", "wcrp-cmip.github.io")

# List current redirects in a rich table with boxes
request_redirector.list_redirects()

# Test a redirect
request_redirector.test_redirect("http://bob.github.io/CMIP7_CVs/_context_")

# Restore defaults (session and redirects)
request_redirector.restore_defaults()

# List redirects again after restoring defaults
request_redirector.list_redirects()


Redirect added: bob.github.io -> wcrp-cmip.github.io


Original URL: http://bob.github.io/CMIP7_CVs/_context_
Final URL: http://wcrp-cmip.github.io/CMIP7_CVs/_context_
Status Code: 404
<!DOCTYPE html>
<html>
  <head>
    <meta http-equiv="Content-type" content="text/html; charset=utf-8">
    <meta http-equiv="Content-Security-Policy" content="default-src 'none'; style-src 'unsafe-inline'; img-src data:; connect-src 'self'">
    <title>Site not found &middot; GitHub Pages</title>
 
Restored default session and redirects.


In [None]:
import requests
from requests.adapters import HTTPAdapter
from urllib.parse import urlparse
import re

from rich.console import Console, Group
from rich.table import Table
from rich.panel import Panel


class RequestRedirector:
    def __init__(self, redirect_rules=None):
        self.redirect_rules = redirect_rules or {}
        self.default_rules = self.redirect_rules.copy()

        # Save original session
        self.default_session = requests.Session
        self._patch_requests()

    def _patch_requests(self):
        adapter = URLRewritingAdapter(self.redirect_rules)
        session = requests.Session()
        session.mount("http://", adapter)
        session.mount("https://", adapter)
        requests.sessions.Session = lambda: session

    def restore_defaults(self):
        self.redirect_rules = self.default_rules.copy()
        requests.sessions.Session = self.default_session
        print("Restored default session and redirect rules.")

    def add_redirect(self, host, regex_in, regex_out):
        self.redirect_rules.setdefault(host, []).append({
            "regex_in": regex_in,
            "regex_out": regex_out
        })
        self._patch_requests()
        print(f"Added redirect: {host} | {regex_in} -> {regex_out}")

    # def list_redirects(self):
    #         """Displays the list of current URL redirects using the `rich` library."""
    #         table = Table(title="Current URL Redirects")
    #         table.add_column("Original URL", justify="center", style="cyan")
    #         table.add_column("Redirected To", justify="center", style="magenta")

    #         for original, new in self.url_map.items():
    #             # Wrap each URL in a Panel (a box) to display it as a box
    #             original_box = Panel(original, expand=True, width=30)
    #             new_box = Panel(new, expand=True, width=30)
                
    #                     # if members:
    #             panels = [Panel(member, expand=True, width=30) for member in [new, original,'bob']]
    #             new_box = Group(*panels)
                
                
    #             # # Add the wrapped URLs (boxes) as rows in the table
    #             # print(f"Original URL: {original}")
    #             # print(f"Redirected To: {new}")
    #             # table.add_row(original, new)
    #             table.add_row(original_box, new_box)


    def list_redirects(self):
        console = Console()
        table = Table(title="Regex-Based URL Redirects (by Host)")

        table.add_column("Host", justify="center", style="cyan", no_wrap=True)
        table.add_column("Rules", justify="left", style="magenta")

        for host, rules in self.redirect_rules.items():
            
            host = Panel(host, expand=True, width=30)
            
            rule_panels = [
                Panel(f"[bold]Match:[/bold] {rule['regex_in']}\n[bold]Replace:[/bold] {rule['regex_out']}", expand=True)
                for rule in rules
            ]
            rules_group = Group(*rule_panels)
            table.add_row(host, rules_group)

        console.print(table)

    def test_redirect(self, url):
        response = requests.get(url, verify=False)
        print(f"Original URL: {url}")
        print(f"Final URL: {response.url}")
        print(f"Status Code: {response.status_code}")
        print(response.text[:300])


class URLRewritingAdapter(HTTPAdapter):
    def __init__(self, redirect_rules, **kwargs):
        self.redirect_rules = redirect_rules
        super().__init__(**kwargs)

    def send(self, request, **kwargs):
        
        print('---',request)
        
        parsed = urlparse(request.url)
        host = parsed.hostname
        path = parsed.path

        rules = self.redirect_rules.get(host, [])

        for rule in rules:
            new_path = re.sub(rule['regex_in'], rule['regex_out'], path)
            if new_path != path:
                # Apply replacement
                new_url = request.url.replace(path, new_path)
                request.url = new_url
                request.headers['Host'] = host  # Virtual host
                break  # Stop at the first match

        return super().send(request, **kwargs)


# Example Usage:

redirect_rules = {
    "bob.github.io": [
        {"regex_in": r"/CMIP7_CVs/.*", "regex_out": r"/cmip7/data/"},
    ],
    "example.com": [
        {"regex_in": r"/old/(.*)", "regex_out": r"/new/\1"},
        {"regex_in": r"/legacy", "regex_out": r"/modern"},
    ]
}

# Initialize redirector
rr = RequestRedirector(redirect_rules)

# Add new rule
rr.add_redirect("cmip:", r"cmip:.*", r"cmip7.com/\1")

# List current redirects
rr.list_redirects()

# Test it
rr.test_redirect("http://bob.github.io/CMIP7_CVs/_context_")
# rr.test_redirect("cmip:kik")

# Restore defaults
rr.restore_defaults()


Added redirect: cmip: | cmip:.* -> cmip7.com/\1


InvalidSchema: No connection adapters were found for 'cmip:kik'

In [3]:
import requests
from requests.adapters import HTTPAdapter
from urllib.parse import urlparse
import re

from rich.console import Console, Group
from rich.table import Table
from rich.panel import Panel


class PrefixResolvingSession(requests.Session):
    def __init__(self, prefix_map, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.prefix_map = prefix_map

    def request(self, method, url, *args, **kwargs):
        if ":" in url and not url.startswith(("http://", "https://")):
            prefix, path = url.split(":", 1)
            base = self.prefix_map.get(prefix)
            if base:
                url = base + path
            else:
                raise ValueError(f"Unknown prefix: {prefix}")
        return super().request(method, url, *args, **kwargs)


class RequestRedirector:
    def __init__(self, redirect_rules={}, prefix_map=None):
        # self.redirect_rules = redirect_rules or {}
        self.redirect_rules = {
            host: [
                {
                    **rule,
                    "regex_in": re.compile(rule["regex_in"]) 
                    # if isinstance(rule.get("regex_in"), str) else rule["regex_in"]
                }
                for rule in rules
            ]
            for host, rules in redirect_rules.items()
        }
        self.default_rules = self.redirect_rules.copy()

        self.prefix_map = prefix_map or {}
        self.default_prefix_map = self.prefix_map.copy()

        self.default_session = requests.Session
        self._patch_requests()
        self.list_redirects()

    def _patch_requests(self):
        session = PrefixResolvingSession(self.prefix_map)
        adapter = URLRewritingAdapter(self.redirect_rules)
        
        session.mount("http://", adapter)
        session.mount("https://", adapter)
        requests.sessions.Session = lambda: session

    def restore_defaults(self):
        self.redirect_rules = self.default_rules.copy()
        self.prefix_map = self.default_prefix_map.copy()
        requests.sessions.Session = self.default_session
        print("Restored default session and redirect rules.")

    def add_redirect(self, host, regex_in, regex_out):
        self.redirect_rules.setdefault(host, []).append({
            "regex_in": re.compile(regex_in),
            "regex_out": regex_out
        })
        self._patch_requests()
        print(f"Added redirect: {host} | {regex_in} -> {regex_out}")

    def add_prefix(self, prefix, base_url):
        self.prefix_map[prefix] = base_url
        self._patch_requests()
        print(f"Added prefix: {prefix}: → {base_url}")

    def list_redirects(self):
        console = Console()
        table = Table(title="Regex-Based URL Redirects (by Host)", expand=True)

        table.add_column("Host", justify="center", style="cyan", no_wrap=True)
        table.add_column("Rules", justify="left", style="magenta")

        for host, rules in self.redirect_rules.items():
            host_panel = Panel(host, expand=True, width=30)
            rule_panels = [
                Panel(f"[bold]Match:[/bold] {rule['regex_in']}\n[bold]Replace:[/bold] {rule['regex_out']}", expand=True)
                for rule in rules
            ]
            rules_group = Group(*rule_panels)
            table.add_row(host_panel, rules_group)

        if self.prefix_map:
            prefix_panel = Panel(
                "\n".join(f"[cyan]{k}[/cyan]: {v}" for k, v in self.prefix_map.items()),
                title="Prefix Resolvers",
                border_style="green"
            )
            console.print(prefix_panel)

        console.print(table)

    def test_redirect(self, url):
        response = requests.get(url, verify=False)
        print(f"Original URL: {url}")
        print(f"Final URL: {response.url}")
        print(f"Status Code: {response.status_code}")
        print(response.text[:300])


class URLRewritingAdapter(HTTPAdapter):
    def __init__(self, redirect_rules, **kwargs):
        self.redirect_rules = redirect_rules
        super().__init__(**kwargs)

    def send(self, request, **kwargs):
        parsed = urlparse(request.url)
        host = parsed.hostname
        # path = parsed.path

        rules = self.redirect_rules.get(host, [])
        for rule in rules:
            # new_path = re.sub(rule['regex_in'], rule['regex_out'], path)
            # if new_path != path:
            # request.url = re.sub(rule['regex_in'], rule['regex_out'], request.url)
            request.url = rule['regex_in'].sub( rule['regex_out'], request.url)
            # request.url.replace(path, new_path)
            # print(host,request.url)
            request.headers['Host'] = urlparse(request.url).hostname  # Preserve original host
            # print('---',request.headers['Host'],request.url,parsed)
            # break

        return super().send(request, **kwargs)


In [4]:
rr = RequestRedirector(
    redirect_rules={
        "bob.github.io": [
            {"regex_in": re.compile(r"bob\.github\.io/cmip_CVs/(.*)"), "regex_out": r"wcrp-cmip.github.io/CMIP7_CVs/\1"},
            {"regex_in": re.compile(r"bob\.github\.io/(.*)"), "regex_out": r"wcrp-cmip.github.io/\1"},
        ]
    },
    prefix_map={
        "cmip": "https://wcrp-cmip.github.io/CMIP7_CVs/",
        "bob": "https://bob.github.io/",
    }
)

rr.add_prefix("gh", "https://github.com/")
rr.add_redirect("example.com", r"/old/(.*)", r"/new/\1")

rr.list_redirects()

rr.test_redirect("https://bob.github.io/cmip_CVs/graph.jsonld")
rr.test_redirect("cmip:_context_")
rr.test_redirect("bob:WCRP-universe/activity/graph.jsonld")
# rr.test_redirect("gh:openai/openai-python")


Added prefix: gh: → https://github.com/
Added redirect: example.com | /old/(.*) -> /new/\1


Original URL: https://bob.github.io/cmip_CVs/graph.jsonld
Final URL: https://wcrp-cmip.github.io/CMIP7_CVs/graph.jsonld
Status Code: 404
<!DOCTYPE html>
<html>
  <head>
    <meta http-equiv="Content-type" content="text/html; charset=utf-8">
    <meta http-equiv="Content-Security-Policy" content="default-src 'none'; style-src 'unsafe-inline'; img-src data:; connect-src 'self'">
    <title>Site not found &middot; GitHub Pages</title>
 




Original URL: cmip:_context_
Final URL: https://wcrp-cmip.github.io/CMIP7_CVs/_context_
Status Code: 404
<!DOCTYPE html>
<html>
  <head>
    <meta http-equiv="Content-type" content="text/html; charset=utf-8">
    <meta http-equiv="Content-Security-Policy" content="default-src 'none'; style-src 'unsafe-inline'; img-src data:; connect-src 'self'">
    <title>Site not found &middot; GitHub Pages</title>
 
Original URL: bob:WCRP-universe/activity/graph.jsonld
Final URL: https://wcrp-cmip.github.io/WCRP-universe/activity/graph.jsonld
Status Code: 200
{
  "@context": {
    "@base": "https://wcrp-cmip.github.io/WCRP-universe/activity/",
    "@vocab": "https://wcrp-cmip.github.io/WCRP-universe/activity/",
    "entries": "@none",
    "id": "@id",
    "type": "@type",
    "cf": "https://wcrp-cmip.github.io/CF/",
    "cmip6plus": "https://wcrp-cmip.gi




In [7]:
requests.get("https://bob:WCRP-universe/activity/graph.jsonld").json()

InvalidURL: Failed to parse: https://bob:WCRP-universe/activity/graph.jsonld