In [1]:
from urllib import parse as urlparse

url1 = 'http://example.com/foo'
url2 = urlparse.urljoin(url1, '/bar')
print(url2)

In [2]:
import string
from hypothesis.strategies import (
    composite, tuples, text, integers, one_of, lists, sampled_from, just
)

@composite
def schemes(draw):
    first = draw(sampled_from(string.ascii_letters))
    rest = draw(text(alphabet=string.ascii_letters + string.digits + '+.-'))
    return first + rest

ipv4_addresses = tuples(
    integers(min_value=0,max_value=255).map(str),
    integers(min_value=0,max_value=255).map(str),
    integers(min_value=0,max_value=255).map(str),
    integers(min_value=0,max_value=255).map(str),
    ).map(lambda x: '.'.join(x))

iunreserveds = text(alphabet=string.ascii_letters + string.digits + '-._~')
pct_encodeds = text(
    alphabet=string.digits + 'ABCDEFabcdef', min_size=2, max_size=2
).map(lambda x: '%' + x)

ireg_names = lists(one_of(
    iunreserveds,
    pct_encodeds,
    )).map(''.join)

iauthorities = one_of(
    ipv4_addresses,
    ireg_names,
)

sub_delims = text(alphabet="!$&'()*+,;=")
pchar = one_of(iunreserveds, pct_encodeds, sub_delims, just(":"), just("@"))
pchar_nc = one_of(iunreserveds, pct_encodeds, sub_delims, just("@"))

segment_nz_nc = lists(pchar_nc, min_size=1).map(''.join)
segment_nz = lists(pchar, min_size=1).map(''.join)
segment = lists(pchar, min_size=0).map(''.join)

paths = one_of(
    just(""),
    lists(segment.map(lambda s: "/" + s)).map("".join),
    segment_nz.flatmap(
        lambda s0: lists(segment.map(lambda sn: "/" + sn)).map("".join).map(lambda ss: s0 + ss)
    ),
    segment_nz_nc.flatmap(
        lambda s0: lists(segment.map(lambda sn: "/" + sn)).map("".join).map(lambda ss: s0 + ss)
    ),
)

@composite
def iris(draw):
    parts = {
        'scheme': draw(schemes()),
        'authority': draw(iauthorities),
        'path': draw(paths.filter(lambda p: p.startswith("/"))),
    }
    return rfc3987.compose(**parts)

In [3]:
import rfc3987
from hypothesis import given

@given(paths.filter(lambda p: p.startswith("/")))
def test_path_generation(path):
    iri = rfc3987.compose(
        scheme="http",
        authority="avengerpenguin.com",
        path=path
    )
    print(iri)
    assert rfc3987.match(iri, rule='IRI')

generated_iris = []
    
@given(iris())
def test_iri_generation(iri):
    assert rfc3987.match(iri, rule='IRI')
    generated_iris.append(iri)
    
test_iri_generation()
import random, json
for iri in random.sample(generated_iris, 10):
    print(iri)

gFUXi7://58.82.92.6//
h8vPS+73-hLEIxV+FUXi7://58.82.92.6//-2f@/@:@!+!''%ad@%8F@Ft//@
a://0.0.0.0/
x+b://213.83.45.23///////:%2c:~ntc%A5)/@%A8
a://0.0.0.0/
b162://1.0.1.7/
s:////%7b%e6@hu9;),&*++*&,)=*%63nxd0XJk:%09
h8vPS+73-hLEIxV+FUXi7://58.82.92.6/
h8vPS+73-hLEIxV+FUXi7://58.82.92.6//-2f@/@:@!+!''
h://58.82.92.6//


See https://tools.ietf.org/html/rfc1034#section-3.1

In [4]:
let = text(alphabet=string.ascii_letters, min_size=1)
let_dig = text(alphabet=string.ascii_letters + string.digits, min_size=1)
let_dig_hyp = one_of(let_dig, just("-"))
subdomains = let.flatmap(
    lambda initial: let_dig.flatmap(
        lambda end: lists(let_dig_hyp).map("".join).map(lambda middle: initial + middle + end)
    )
)

domains = lists(subdomains, min_size=1).map(".".join)

import re

@given(domains)
def test_domain_generation(domain):
    assert re.match(
        '^(?:[A-Za-z0-9](?:[A-Za-z0-9-]{0,61}[A-Za-z0-9])?\.)*[A-Za-z0-9][A-Za-z0-9-]{0,61}[A-Za-z0-9]$',
        domain
    )

try:
    test_domain_generation()
except AssertionError as e:
    print(e)

Falsifying example: test_domain_generation(
    domain='A----------00000000000000000000000000000000000000000000000000000',
)



In [6]:
subdomains_max = subdomains.filter(lambda s: 1 <= len(s) <= 63)
domains_max = lists(subdomains_max, min_size=1).map(".".join).filter(lambda s: 1 <= len(s) <= 255)

import re

@given(domains_max)
def test_limited_domain_generation(domain):
    assert re.match(
        '^(?:[A-Za-z0-9](?:[A-Za-z0-9-]{0,61}[A-Za-z0-9])?\.)*[A-Za-z0-9][A-Za-z0-9-]{0,61}[A-Za-z0-9]$',
        domain
    )

# @composite
# def urls(draw):
#     parts = {
#         'scheme': draw(one_of(just("http"), just("https"))),
#         'authority': draw(iauthorities),
#         'path': draw(paths.filter(lambda p: p.startswith("/"))),
#     }
#     return rfc3987.compose(**parts)

test_limited_domain_generation()