Skip to content

Commit

Permalink
Add origin support. Wrap furl.set() to guard against side effects fro…
Browse files Browse the repository at this point in the history
…m all exceptions.
  • Loading branch information
gruns committed Jul 17, 2016
1 parent 6d7d2c1 commit 3d75e68
Show file tree
Hide file tree
Showing 3 changed files with 153 additions and 65 deletions.
18 changes: 15 additions & 3 deletions API.md
Expand Up @@ -23,7 +23,7 @@ scheme://username:password@host:port/path?query#fragment
separated by an optional `?` separator.


### Scheme, Username, Password, Host, Port, and Network Location
### Scheme, Username, Password, Host, Port, Network Location, and Origin

__scheme__, __username__, __password__, and __host__ are strings or
None. __port__ is an integer or None.
Expand All @@ -47,7 +47,7 @@ None
```

__netloc__ is the string combination of __username__, __password__, __host__,
and __port__, not including __port__ if it is None or the default port for the
and __port__, not including __port__ if it's None or the default port for the
provided __scheme__.

```python
Expand All @@ -61,6 +61,17 @@ provided __scheme__.
'user:pass@www.google.com:99'
```

__origin__ is the string combination of __scheme__, __host__, and __port__, not
including __port__ if it's None or the default port for the provided __scheme__.

```python
>>> furl('http://www.google.com/').origin
'http://www.google.com'

>>> furl('http://www.google.com:99/').origin
'http://www.google.com:99'
```


### Path

Expand Down Expand Up @@ -298,7 +309,6 @@ is application/x-www-form-urlencoded (`+` not `%20`).
```



### Fragment

URL fragments in furl are Fragment objects that have a Path __path__ and Query
Expand Down Expand Up @@ -355,6 +365,7 @@ True
'http://www.google.com/#!a=dict&of=args'
```


### Encoding

Furl handles encoding for you, and furl's philosophy on encoding is simple: URL
Expand Down Expand Up @@ -452,6 +463,7 @@ __set()__ sets items of a furl object with the optional arguments
* __path__: List of path segments or a path string to adopt.
* __scheme__: Scheme string to adopt.
* __netloc__: Network location string to adopt.
* __origin__: Origin string to adopt.
* __query__: Query string to adopt.
* __query_params__: A dictionary of query keys and values to adopt.
* __fragment__: Fragment string to adopt.
Expand Down
130 changes: 80 additions & 50 deletions furl/furl.py
Expand Up @@ -1286,6 +1286,29 @@ def netloc(self, netloc):
self.username = None if username is None else unquote(username)
self.password = None if password is None else unquote(password)

@property
def origin(self):
port = ''
scheme = self.scheme or ''
host = idna_encode(self.host) or ''
if self.port and self.port != DEFAULT_PORTS.get(self.scheme):
port = ':%s' % self.port
origin = '%s://%s%s' % (scheme, host, port)
return origin

@origin.setter
def origin(self, origin):
toks = origin.split('://', 1)
if len(toks) == 1:
host_port = origin
else:
self.scheme, host_port = toks

if ':' in host_port:
self.host, self.port = host_port.split(':', 1)
else:
self.host = host_port

@property
def url(self):
return self.tostr()
Expand Down Expand Up @@ -1343,18 +1366,18 @@ def add(self, args=_absent, path=_absent, fragment_path=_absent,
return self

def set(self, args=_absent, path=_absent, fragment=_absent, scheme=_absent,
netloc=_absent, fragment_path=_absent, fragment_args=_absent,
fragment_separator=_absent, host=_absent, port=_absent,
query=_absent, query_params=_absent, username=_absent,
password=_absent):
"""
Set components of a url and return this furl instance, <self>.
netloc=_absent, origin=_absent, fragment_path=_absent,
fragment_args=_absent, fragment_separator=_absent, host=_absent,
port=_absent, query=_absent, query_params=_absent,
username=_absent, password=_absent):
"""Set components of a url and return this furl instance, <self>.
If any overlapping, and hence possibly conflicting, parameters
are provided, appropriate UserWarning's will be raised. The
groups of parameters that could potentially overlap are
<netloc> and (<host> or <port>)
<scheme> and <origin>
<origin>, <netloc>, and/or (<host> or <port>)
<fragment> and (<fragment_path> and/or <fragment_args>)
any two or all of <query>, <args>, and/or <query_params>
Expand All @@ -1378,6 +1401,7 @@ def set(self, args=_absent, path=_absent, fragment=_absent, scheme=_absent,
fragment: Fragment string to adopt.
scheme: Scheme string to adopt.
netloc: Network location string to adopt.
origin: Scheme and netloc.
query: Query string to adopt.
query_params: A dictionary of query keys and values or list of
key:value items to adopt.
Expand All @@ -1394,73 +1418,79 @@ def set(self, args=_absent, path=_absent, fragment=_absent, scheme=_absent,
password: Password string to adopt.
Raises:
ValueError on invalid port.
UserWarning if <netloc> and (<host> and/or <port>) are
provided.
UserWarning if <query>, <args>, and/or <query_params> are
UserWarning if <scheme> and <origin> are provided.
UserWarning if <origin>, <netloc> and/or (<host> and/or <port>) are
provided.
UserWarning if <query>, <args>, and/or <query_params> are provided.
UserWarning if <fragment> and (<fragment_path>,
<fragment_args>, and/or <fragment_separator>) are provided.
Returns: <self>.
"""
netloc_present = netloc is not _absent
if (netloc_present and (host is not _absent or port is not _absent)):
s = ('Possible parameter overlap: <netloc> and <host> and/or '
'<port> provided. See furl.set() documentation for more '
'details.')
present = lambda v: v is not _absent
if present(scheme) and present(origin):
s = ('Possible parameter overlap: <scheme> and <origin>. See '
'furl.set() documentation for more details.')
warnings.warn(s, UserWarning)
l = [present(netloc), present(origin), present(host) or present(port)]
if sum(l) >= 2:
s = ('Possible parameter overlap: <origin>, <netloc> and/or '
'(<host> and/or <port>) provided. See furl.set() '
'documentation for more details.')
warnings.warn(s, UserWarning)
if ((args is not _absent and query is not _absent) or
(query is not _absent and query_params is not _absent) or
(args is not _absent and query_params is not _absent)):
if sum(present(p) for p in [args, query, query_params]) >= 2:
s = ('Possible parameter overlap: <query>, <args>, and/or '
'<query_params> provided. See furl.set() documentation for '
'more details.')
warnings.warn(s, UserWarning)
if (fragment is not _absent and
(fragment_path is not _absent or fragment_args is not _absent or
(fragment_separator is not _absent))):
l = [fragment_path, fragment_args, fragment_separator]
if present(fragment) and any(present(p) for p in l):
s = ('Possible parameter overlap: <fragment> and '
'(<fragment_path>and/or <fragment_args>) or <fragment> '
'and <fragment_separator> provided. See furl.set() '
'documentation for more details.')
warnings.warn(s, UserWarning)

# Avoid side effects if exceptions are raised.
oldnetloc, oldport = self.netloc, self.port
# Guard against side effects on exception.
original_url = self.url
try:
if username is not _absent:
self.username = username
if password is not _absent:
self.password = password
if netloc is not _absent:
# Raises ValueError on invalid port or malformed IP.
self.netloc = netloc
if origin is not _absent:
# Raises ValueError on invalid port or malformed IP.
self.origin = origin
if scheme is not _absent:
self.scheme = scheme
if host is not _absent:
# Raises ValueError on invalid host or malformed IP.
self.host = host
if port is not _absent:
self.port = port # Raises ValueError on invalid port.
except ValueError:
self.netloc, self.port = oldnetloc, oldport

if path is not _absent:
self.path.load(path)
if query is not _absent:
self.query.load(query)
if args is not _absent:
self.query.load(args)
if query_params is not _absent:
self.query.load(query_params)
if fragment is not _absent:
self.fragment.load(fragment)
if fragment_path is not _absent:
self.fragment.path.load(fragment_path)
if fragment_args is not _absent:
self.fragment.query.load(fragment_args)
if fragment_separator is not _absent:
self.fragment.separator = fragment_separator
except:
self.load(original_url)
raise

if username is not _absent:
self.username = username
if password is not _absent:
self.password = password
if scheme is not _absent:
self.scheme = scheme
if host is not _absent:
self.host = host

if path is not _absent:
self.path.load(path)
if query is not _absent:
self.query.load(query)
if args is not _absent:
self.query.load(args)
if query_params is not _absent:
self.query.load(query_params)
if fragment is not _absent:
self.fragment.load(fragment)
if fragment_path is not _absent:
self.fragment.path.load(fragment_path)
if fragment_args is not _absent:
self.fragment.query.load(fragment_args)
if fragment_separator is not _absent:
self.fragment.separator = fragment_separator
return self

def remove(self, args=_absent, path=_absent, fragment=_absent,
Expand Down
70 changes: 58 additions & 12 deletions tests/test_furl.py
Expand Up @@ -996,11 +996,16 @@ def setUp(self):
warnings.simplefilter("always")

def _param(self, url, key, val):
# Note: urlparse.urlsplit() doesn't separate the query from the
# path for all schemes, only those schemes in the list
# urlparse.uses_query. So, as a result of using
# urlparse.urlsplit(), this little helper function only works
# when provided URLs whos schemes are also in
# urlsplit() only parses the query for schemes in urlparse.uses_query,
# so switch to 'http' (a scheme in urlparse.uses_query) for
# urlparse.urlsplit().
if '://' in url:
url = 'http://%s' % url.split('://', 1)[1]

# Note: urlparse.urlsplit() doesn't separate the query from the path
# for all schemes, only those schemes in the list urlparse.uses_query.
# So, as a result of using urlparse.urlsplit(), this little helper
# function only works when provided URLs whos schemes are also in
# urlparse.uses_query.
items = urllib.parse.parse_qsl(urllib.parse.urlsplit(url).query, True)
return (key, val) in items
Expand Down Expand Up @@ -1432,7 +1437,7 @@ def test_hosts(self):
with self.assertRaises(ValueError):
f = furl.furl('http://google.com/').set(host=host)

def test_netlocs(self):
def test_netloc(self):
f = furl.furl('http://pumps.com/')
netloc = '1.2.3.4.5.6:999'
f.netloc = netloc
Expand All @@ -1454,7 +1459,7 @@ def test_netlocs(self):
with self.assertRaises(ValueError):
f.netloc = '0:0:0:0:0:0:0:1]'

# Invalid ports.
# Invalid ports should raise an exception.
with self.assertRaises(ValueError):
f.netloc = '[0:0:0:0:0:0:0:1]:alksdflasdfasdf'
with self.assertRaises(ValueError):
Expand All @@ -1464,6 +1469,35 @@ def test_netlocs(self):
assert f.host == '[0:0:0:0:0:0:0:1:1:1:1:1:1:1:1:9999999999999]'
assert f.port == 888

def test_origin(self):
assert furl.furl().set(host='slurp.ru').origin == '://slurp.ru'
assert furl.furl('http://pep.ru:83/yep').origin == 'http://pep.ru:83'
assert furl.furl().set(origin='pep://yep.ru').origin == 'pep://yep.ru'
f = furl.furl('http://user:pass@pumps.com/path?query#fragemtn')
assert f.origin == 'http://pumps.com'

f = furl.furl('none://ignored/lol?sup').set(origin='sup://yep.biz:99')
assert f.url == 'sup://yep.biz:99/lol?sup'

# Username and password are unaffected.
f = furl.furl('http://user:pass@slurp.com')
f.origin = 'ssh://horse-machine.de'
assert f.url == 'ssh://user:pass@horse-machine.de'

# Malformed IPv6 should raise an exception because
# urlparse.urlsplit() raises an exception in Python v2.7+.
if PYTHON_27PLUS:
with self.assertRaises(ValueError):
f.origin = '[0:0:0:0:0:0:0:1'
with self.assertRaises(ValueError):
f.origin = 'http://0:0:0:0:0:0:0:1]'

# Invalid ports should raise an exception.
with self.assertRaises(ValueError):
f.origin = '[0:0:0:0:0:0:0:1]:alksdflasdfasdf'
with self.assertRaises(ValueError):
f.origin = 'http://pump2pump.org:777777777777'

def test_ports(self):
# Default port values.
assert furl.furl('http://www.pumps.com/').port == 80
Expand Down Expand Up @@ -1597,17 +1631,29 @@ def test_set(self):
f = furl.furl('http://pumps.com')
warnings.simplefilter("always")

# Host, port, and netloc overlap - host and port take
# precedence.
# Scheme, origin overlap. Scheme takes precedence.
with warnings.catch_warnings(record=True) as w1:
f.set(scheme='hi', origin='bye://sup.sup')
assert len(w1) == 1 and issubclass(w1[0].category, UserWarning)
assert f.scheme == 'hi'

# Netloc, origin, host and/or port. Host and port take precedence.
with warnings.catch_warnings(record=True) as w1:
f.set(netloc='dumps.com:99', origin='sup://pumps.com:88')
assert len(w1) == 1 and issubclass(w1[0].category, UserWarning)
with warnings.catch_warnings(record=True) as w1:
f.set(netloc='dumps.com:99', host='ohay.com')
assert len(w1) == 1 and issubclass(w1[0].category, UserWarning)
f.host == 'ohay.com'
f.port == 99
assert f.host == 'ohay.com'
assert f.port == 99
with warnings.catch_warnings(record=True) as w2:
f.set(netloc='dumps.com:99', port=88)
assert len(w2) == 1 and issubclass(w2[0].category, UserWarning)
f.port == 88
assert f.port == 88
with warnings.catch_warnings(record=True) as w2:
f.set(origin='http://dumps.com:99', port=88)
assert len(w2) == 1 and issubclass(w2[0].category, UserWarning)
assert f.port == 88
with warnings.catch_warnings(record=True) as w3:
f.set(netloc='dumps.com:99', host='ohay.com', port=88)
assert len(w3) == 1 and issubclass(w3[0].category, UserWarning)
Expand Down

0 comments on commit 3d75e68

Please sign in to comment.