Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
lpeg_patterns/lpeg_patterns/uri.lua
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
143 lines (118 sloc)
4.16 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| -- URI | |
| -- RFC 3986 | |
| local lpeg = require "lpeg" | |
| local P = lpeg.P | |
| local S = lpeg.S | |
| local C = lpeg.C | |
| local Cc = lpeg.Cc | |
| local Cg = lpeg.Cg | |
| local Cs = lpeg.Cs | |
| local Ct = lpeg.Ct | |
| local util = require "lpeg_patterns.util" | |
| local core = require "lpeg_patterns.core" | |
| local ALPHA = core.ALPHA | |
| local DIGIT = core.DIGIT | |
| local HEXDIG = core.HEXDIG | |
| local IPv4address = require "lpeg_patterns.IPv4".IPv4address | |
| local IPv6address = require "lpeg_patterns.IPv6".IPv6address | |
| local _M = {} | |
| _M.sub_delims = S"!$&'()*+,;=" -- 2.2 | |
| local unreserved = ALPHA + DIGIT + S"-._~" -- 2.3 | |
| _M.pct_encoded = P"%" * (HEXDIG * HEXDIG / util.read_hex) / function(n) | |
| local c = string.char(n) | |
| if unreserved:match(c) then | |
| -- always decode unreserved characters (2.3) | |
| return c | |
| else | |
| -- normalise to upper-case (6.2.2.1) | |
| return string.format("%%%02X", n) | |
| end | |
| end -- 2.1 | |
| _M.scheme = ALPHA * (ALPHA + DIGIT + S"+-.")^0 / string.lower -- 3.1 | |
| _M.userinfo = Cs((unreserved + _M.pct_encoded + _M.sub_delims + P":")^0) -- 3.2.1 | |
| -- Host 3.2.2 | |
| local IPvFuture_mt = { | |
| __name = "lpeg_patterns.IPvFuture"; | |
| } | |
| function IPvFuture_mt:__tostring() | |
| return string.format("v%x.%s", self.version, self.string) | |
| end | |
| local function new_IPvFuture(version, string) | |
| return setmetatable({version=version, string=string}, IPvFuture_mt) | |
| end | |
| local IPvFuture = S"vV" * (HEXDIG^1/util.read_hex) * P"." * C((unreserved+_M.sub_delims+P":")^1) / new_IPvFuture | |
| -- RFC 6874 | |
| local ZoneID = Cs((unreserved + _M.pct_encoded)^1) | |
| local IPv6addrz = IPv6address * (P"%25" * ZoneID)^-1 / function(IPv6, zoneid) | |
| IPv6:setzoneid(zoneid) | |
| return IPv6 | |
| end | |
| _M.IP_literal = P"[" * (IPv6addrz + IPvFuture) * P"]" | |
| local IP_host = (_M.IP_literal + IPv4address) / tostring | |
| local reg_name = Cs(( | |
| unreserved / string.lower | |
| + _M.pct_encoded / function(s) return s:sub(1,1) == "%" and s or string.lower(s) end | |
| + _M.sub_delims | |
| )^1) + Cc(nil) | |
| _M.host = IP_host + reg_name | |
| _M.port = DIGIT^0 / tonumber -- 3.2.3 | |
| -- Path 3.3 | |
| local pchar = unreserved + _M.pct_encoded + _M.sub_delims + S":@" | |
| local segment = pchar^0 | |
| _M.segment = Cs(segment) | |
| local segment_nz = pchar^1 | |
| local segment_nz_nc = (pchar - P":")^1 | |
| -- an empty path is nil instead of the empty string | |
| local path_empty = Cc(nil) | |
| local path_abempty = Cs((P"/" * segment)^1) + path_empty | |
| local path_rootless = Cs(segment_nz * (P"/" * segment)^0) | |
| local path_noscheme = Cs(segment_nz_nc * (P"/" * segment)^0) | |
| local path_absolute = Cs(P"/" * (segment_nz * (P"/" * segment)^0)^-1) | |
| _M.query = Cs( ( pchar + S"/?" )^0 ) -- 3.4 | |
| _M.fragment = _M.query -- 3.5 | |
| -- Put together with named captures | |
| _M.authority = ( Cg(_M.userinfo, "userinfo") * P"@" )^-1 | |
| * Cg(_M.host, "host") | |
| * ( P":" * Cg(_M.port, "port") )^-1 | |
| local hier_part = P"//" * _M.authority * Cg (path_abempty, "path") | |
| + Cg(path_absolute + path_rootless + path_empty, "path") | |
| _M.absolute_uri = Ct ( | |
| ( Cg(_M.scheme, "scheme") * P":" ) | |
| * hier_part | |
| * ( P"?" * Cg(_M.query, "query"))^-1 | |
| ) | |
| _M.uri = Ct ( | |
| ( Cg(_M.scheme, "scheme") * P":" ) | |
| * hier_part | |
| * ( P"?" * Cg(_M.query, "query"))^-1 | |
| * ( P"#" * Cg(_M.fragment, "fragment"))^-1 | |
| ) | |
| _M.relative_part = P"//" * _M.authority * Cg(path_abempty, "path") | |
| + Cg(path_absolute + path_noscheme + path_empty, "path") | |
| local relative_ref = Ct ( | |
| _M.relative_part | |
| * ( P"?" * Cg(_M.query, "query"))^-1 | |
| * ( P"#" * Cg(_M.fragment, "fragment"))^-1 | |
| ) | |
| _M.uri_reference = _M.uri + relative_ref | |
| _M.path = path_abempty + path_absolute + path_noscheme + path_rootless + path_empty | |
| -- Create a slightly more sane host pattern | |
| -- scheme is optional | |
| -- the "//" isn't required | |
| -- if missing, the host needs to at least have a "." and end in two alpha characters | |
| -- an authority is always required | |
| local sane_host_char = unreserved / string.lower | |
| local hostsegment = (sane_host_char - P".")^1 | |
| local dns_entry = Cs ( ( hostsegment * P"." )^1 * ALPHA^2 ) | |
| _M.sane_host = IP_host + dns_entry | |
| _M.sane_authority = ( Cg(_M.userinfo, "userinfo") * P"@" )^-1 | |
| * Cg(_M.sane_host, "host") | |
| * ( P":" * Cg(_M.port, "port") )^-1 | |
| local sane_hier_part = (P"//")^-1 * _M.sane_authority * Cg(path_absolute + path_empty, "path") | |
| _M.sane_uri = Ct ( | |
| ( Cg(_M.scheme, "scheme") * P":" )^-1 | |
| * sane_hier_part | |
| * ( P"?" * Cg(_M.query, "query"))^-1 | |
| * ( P"#" * Cg(_M.fragment, "fragment"))^-1 | |
| ) | |
| return _M |