Skip to content

Commit

Permalink
enhance url manipulation
Browse files Browse the repository at this point in the history
  • Loading branch information
_eternal_flame authored and _eternal_flame committed May 30, 2018
1 parent 1ecc5ef commit f4b58b2
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 3 deletions.
22 changes: 19 additions & 3 deletions pxelem.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import downloader
import bs4
import config
import re

author_cache = {}

Expand Down Expand Up @@ -34,13 +35,15 @@ def __str__(self):


class PixlvUrl():
def __init__(self, url, base=None, info={}, use_sessid=True):
def __init__(self, url, base=None, info={}, use_sessid=True, use_english=True):
self.info = info
self.use_sessid = use_sessid
if base:
self.url = urlparser.urlparse(urlparser.urljoin(base, url))
else:
self.url = urlparser.urlparse(url)
if use_english:
self.addquerystring("lang", "en")

def addinfo(self, key, elem):
self.info[key] = elem
Expand Down Expand Up @@ -73,10 +76,23 @@ def geturl(self):
return self.url.geturl()

def geturi(self):
path=self.url.path
if path=="":
path="/"
if self.url.query != "":
return self.url.path + "?" + self.url.query
return path + "?" + self.url.query
else:
return self.url.path
return path

def getquerydict(self):
return urlparser.parse_qs(self.url.query)

def addquerystring(self, key, elem):
url = self.geturl()
if key in self.getquerydict():
url += "&"
orig_query_string = re.search(key + "=(.*?)&", url).group(0)
url = url.replace(orig_query_string, key + "=" + elem + "&")[:-1]
else:
url = url + ("?" if "?" not in url else "&") + key + "=" + elem
self.url = urlparser.urlparse(url)
33 changes: 33 additions & 0 deletions tests/test_urlparse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import os
import sys

parentdir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.insert(0, parentdir)

from pxelem import PixlvUrl

class TestUrlParse():

def test_simple_url(self):
url=PixlvUrl("https://www.example.com", use_english=False)
assert url.getscheme()=="https"
assert url.gethost()=="www.example.com"
assert url.getquerydict()=={}
assert url.geturi()=="/"

def test_query_string(self):
url=PixlvUrl("https://www.example.com/query.php?param1=abc&param2=123", use_english=False)
assert url.getquerydict()["param1"][0]=="abc"
url.addquerystring("param3", "456")
assert url.getquerydict()["param3"][0]=="456"
url.addquerystring("param3", "789")
assert url.getquerydict()["param3"][0]=="789"
assert url.geturl()=="https://www.example.com/query.php?param1=abc&param2=123&param3=789"

def test_port_detection(self):
url=PixlvUrl("https://www.example.com")
assert url.getport()==443
url=PixlvUrl("http://www.example.com")
assert url.getport()==80
url=PixlvUrl("https://www.example.com:444")
assert url.getport()==444

0 comments on commit f4b58b2

Please sign in to comment.