Skip to content

Commit

Permalink
add uri_parser
Browse files Browse the repository at this point in the history
  • Loading branch information
i-rinat committed Sep 27, 2014
1 parent 88c175e commit 65536af
Show file tree
Hide file tree
Showing 8 changed files with 629 additions and 0 deletions.
6 changes: 6 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,12 @@ set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O0 -g3")
# request pthreads
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthread")

# find Ragel
find_program(RAGEL ragel)
if (${RAGEL} STREQUAL "RAGEL-NOTFOUND")
message(FATAL_ERROR "Ragel State Machine Compiler not found")
endif()

# dependencies
find_package(PkgConfig REQUIRED)
pkg_check_modules(REQ
Expand Down
3 changes: 3 additions & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ endif()

set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})

add_subdirectory(uri_parser)

add_library(freshwrapper-obj OBJECT
async_network.c
config.c
Expand Down Expand Up @@ -71,6 +73,7 @@ add_library(freshwrapper-obj OBJECT
add_library(freshwrapper-pepperflash SHARED
$<TARGET_OBJECTS:freshwrapper-obj>
$<TARGET_OBJECTS:parson-obj>
$<TARGET_OBJECTS:uri-parser-obj>
config_pepperflash.c
)

Expand Down
14 changes: 14 additions & 0 deletions src/uri_parser/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
set(URI_PARSER_FSM_C "${CMAKE_CURRENT_BINARY_DIR}/uri_parser_fsm.c")
include_directories("${CMAKE_CURRENT_BINARY_DIR}")
add_custom_command(
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/uri_parser_fsm.c"
DEPENDS uri_parser_fsm.rl
COMMAND "${RAGEL}"
ARGS -G2 "${CMAKE_CURRENT_SOURCE_DIR}/uri_parser_fsm.rl" -o "${URI_PARSER_FSM_C}"
VERBATIM
)
add_custom_target(generate_fsm DEPENDS "${URI_PARSER_FSM_C}")
add_library(uri-parser-obj OBJECT
uri_parser.c
)
add_dependencies(uri-parser-obj generate_fsm)
309 changes: 309 additions & 0 deletions src/uri_parser/uri_parser.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,309 @@
/*
* Copyright © 2013-2014 Rinat Ibragimov
*
* This file is part of FreshPlayerPlugin.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/

#include "uri_parser.h"
#include <assert.h>
#include <string.h>
#include <stdio.h>
#include <glib.h>


typedef struct {
int len;
char *data;
} str_t;


static
void
set_url_component(struct PP_URLComponent_Dev *c, const char *s, const char *b, const char *e)
{
c->begin = b - s;
c->len = e - b;
}

static
void
reset_url_component(struct PP_URLComponent_Dev *c)
{
c->begin = 0;
c->len = -1;
}

static
void
reset_url_components(struct PP_URLComponents_Dev *c)
{
reset_url_component(&c->scheme);
reset_url_component(&c->username);
reset_url_component(&c->password);
reset_url_component(&c->host);
reset_url_component(&c->port);
reset_url_component(&c->path);
reset_url_component(&c->query);
reset_url_component(&c->ref);
}

void
uri_parser_parse_uri(const char *s, struct PP_URLComponents_Dev *components)
{
const char *p = s;
const char *pe = s + strlen(s);
const char *eof = pe;
struct {
const char *scheme;
const char *username;
const char *password;
const char *host;
const char *port;
const char *path;
const char *query;
const char *fragment;
} mark;
const char *end_username, *end_password;
int cs;
struct PP_URLComponents_Dev c2;

reset_url_components(components);
reset_url_components(&c2);

memset(&mark, 0, sizeof(mark));

#include <uri_parser_fsm.c>

// silence unused-variable warnings
(void)uri_parser_en_main;
(void)uri_parser_error;
(void)uri_parser_first_final;

}

static inline int
urlcomponent_is_empty(const struct PP_URLComponent_Dev *c)
{
return c->begin == 0 && c->len == -1;
}

static inline int
urlcomponent_is_not_empty(const struct PP_URLComponent_Dev *c)
{
return !urlcomponent_is_empty(c);
}

static inline int
authority_is_not_empty(const struct PP_URLComponents_Dev *c)
{
return urlcomponent_is_not_empty(&c->username) ||
urlcomponent_is_not_empty(&c->password) ||
urlcomponent_is_not_empty(&c->host) ||
urlcomponent_is_not_empty(&c->port);
}

static str_t
extract_component(const char *s, const struct PP_URLComponent_Dev *c)
{
str_t res;
res.data = (char *)s + c->begin;
res.len = (c->len >= 0) ? c->len : 0;
return res;
}

static str_t
extract_authority(const char *s, struct PP_URLComponents_Dev *c)
{
int32_t begin = 0;
if (c->port.len >= 0) begin = c->port.begin;
if (c->host.len >= 0) begin = c->host.begin;
if (c->password.len >= 0) begin = c->password.begin;
if (c->username.len >= 0) begin = c->username.begin;

int32_t end = -1;
if (c->username.len >= 0) end = c->username.begin + c->username.len;
if (c->password.len >= 0) end = c->password.begin + c->password.len;
if (c->host.len >= 0) end = c->host.begin + c->host.len;
if (c->port.len >= 0) end = c->port.begin + c->port.len;

struct PP_URLComponent_Dev cc = { .begin = begin, .len = end - begin };
return extract_component(s, &cc);
}

static str_t
merge_path(const char *base_uri, const struct PP_URLComponents_Dev *base_c, const char *rel_uri,
const struct PP_URLComponents_Dev *rel_c, GList **m)
{
gchar *s;
str_t rel_path = extract_component(rel_uri, &rel_c->path);

if (base_c->host.len > 0 && base_c->path.len <= 0) {
s = g_strdup_printf("/%.*s", rel_path.len, rel_path.data);
} else {
str_t base_path = extract_component(base_uri, &base_c->path);
int len = base_path.len;

while (len > 0 && base_path.data[len - 1] != '/')
len --;

s = g_strdup_printf("%.*s%.*s", len, base_path.data, rel_path.len, rel_path.data);
}

*m = g_list_prepend(*m, s);
return (str_t){ .data = s, .len = strlen(s) };
}

static str_t
remove_dot_segments(str_t s, GList **m)
{
// make a working copy
str_t res;
res.data = g_strndup(s.data, s.len);
res.len = strlen(res.data);
*m = g_list_prepend(*m, res.data);

int pdst = 0;
int dot_cnt = 0;
int after_slash = 1;
for (int psrc = 0; psrc < s.len; psrc ++) {
char c = s.data[psrc];
switch (c) {
case '/':
if (dot_cnt == 1 || dot_cnt == 2) {
for (int k = 0; k < dot_cnt; k ++) {
while (pdst > 0 && res.data[pdst - 1] != '/')
pdst --;
if (pdst > 0)
pdst --;
}
}
res.data[pdst++] = '/';
dot_cnt = 0;
after_slash = 1;
break;
case '.':
res.data[pdst++] = '.';
if (after_slash)
dot_cnt ++;
break;
default:
dot_cnt = 0;
after_slash = 0;
res.data[pdst++] = c;
break;
}
}

// treat trailing "/.." and "/."
if (after_slash && (dot_cnt == 1 || dot_cnt == 2)) {
for (int k = 0; k < dot_cnt; k ++) {
while (pdst > 0 && res.data[pdst - 1] != '/')
pdst --;
if (pdst > 0)
pdst --;
}
res.data[pdst++] = '/';
}

res.len = pdst;
return res;
}

gchar *
uri_parser_merge_uris(const char *base_uri, const char *rel_uri)
{
struct PP_URLComponents_Dev base_c, rel_c;
str_t scheme = {};
str_t authority = {};
str_t path = {};
str_t query = {};
str_t fragment = {};
GList *m = NULL; // list of allocated memory blocks

uri_parser_parse_uri(base_uri, &base_c);
uri_parser_parse_uri(rel_uri, &rel_c);

// See RFC 3986, 5.2. Relative Resolution
if (urlcomponent_is_not_empty(&rel_c.scheme)) {
scheme = extract_component(rel_uri, &rel_c.scheme);
authority = extract_authority(rel_uri, &rel_c);
path = extract_component(rel_uri, &rel_c.path);
path = remove_dot_segments(path, &m);
query = extract_component(rel_uri, &rel_c.query);
} else {
if (authority_is_not_empty(&rel_c)) {
authority = extract_authority(rel_uri, &rel_c);
path = extract_component(rel_uri, &rel_c.path);
path = remove_dot_segments(path, &m);
query = extract_component(rel_uri, &rel_c.query);
} else {
if (rel_c.path.len <= 0) {
path = extract_component(base_uri, &base_c.path);
if (urlcomponent_is_not_empty(&rel_c.query)) {
query = extract_component(rel_uri, &rel_c.query);
} else {
query = extract_component(base_uri, &base_c.query);
}
} else {
if (rel_c.path.len > 0 && rel_uri[rel_c.path.begin] == '/') {
path = extract_component(rel_uri, &rel_c.path);
path = remove_dot_segments(path, &m);
} else {
path = merge_path(base_uri, &base_c, rel_uri, &rel_c, &m);
path = remove_dot_segments(path, &m);
}
query = extract_component(rel_uri, &rel_c.query);
}
authority = extract_authority(base_uri, &base_c);
}
scheme = extract_component(base_uri, &base_c.scheme);
}

fragment = extract_component(rel_uri, &rel_c.ref);

gchar *res = g_strdup_printf(
"%.*s" // scheme
"%s" // ":" if scheme is defined
"%s" // "//" if authority is defined
"%.*s" // authority
"%.*s" // path
"%s" // "?" if query is defined
"%.*s" // query
"%s" // "#" if fragment is defined
"%.*s", // fragment
scheme.len, scheme.data,
scheme.len > 0 ? ":" : "",
authority.len > 0 ? "//" : "",
authority.len, authority.data,
path.len, path.data,
query.len > 0 ? "?" : "",
query.len, query.data,
fragment.len > 0 ? "#" : "",
fragment.len, fragment.data);

// free temporary strings
while (m) {
g_free(m->data);
m = g_list_next(m);
}

return res;
}
37 changes: 37 additions & 0 deletions src/uri_parser/uri_parser.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
/*
* Copyright © 2013-2014 Rinat Ibragimov
*
* This file is part of FreshPlayerPlugin.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/

#ifndef FPP__URI_PARSER_H
#define FPP__URI_PARSER_H

#include <ppapi/c/dev/ppb_url_util_dev.h>
#include <glib.h>

void
uri_parser_parse_uri(const char *s, struct PP_URLComponents_Dev *components);

gchar *
uri_parser_merge_uris(const char *base_uri, const char *rel_uri);

#endif // FPP__URI_PARSER_H

0 comments on commit 65536af

Please sign in to comment.