Skip to content
This repository
branch: master
Fetching contributors…

Octocat-spinner-32-eaf2f5

Cannot retrieve contributors at this time

file 185 lines (168 sloc) 4.949 kb
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <json/printbuf.h>
#include "parsley.h"
#include "xml2json.h"
#include <libxslt/xslt.h>
#include <libxslt/xsltInternals.h>
#include <libxslt/transform.h>
#include <libxml/parser.h>
#include <libxml/HTMLparser.h>
#include <libxml/HTMLtree.h>
#include <libxml/xmlwriter.h>
#include <json/json.h>
#include <argp.h>
#include "util.h"

struct arguments
{
struct list_elem *include_files;
int flags;
int output_xml;
char *parsley;
char *user_agent;
  char *input_file;
  char *output_file;
};

struct list_elem {
int has_next;
struct list_elem *next;
char *string;
};

const char *argp_program_version = "parsley 0.1";
const char *argp_program_bug_address = "<kyle@kylemaxwell.com>";
static char args_doc[] = "PARSELET FILE_TO_PARSE";
static char doc[] = "Parsley is a parselet parser.";

static struct argp_option options[] = {
{"input-xml", 'x', 0, 0, "Use the XML parser (not HTML)" },
{"output-xml", 'X', 0, 0, "Output XML (not JSON)" },
{"output", 'o', "FILE", 0, "Output to FILE instead of standard output" },
  {"include", 'i', "FILE", 0, "Include the contents of FILE in the compiled XSLT" },
{"no-prune", 'n', 0, 0, "Don't prune empty subtrees" },
{"no-collate", 'N', 0, 0, "Don't collate array entries" },
{"sg-wrap", 's', 0, 0, "Wrap text nodes for SelectorGadget compatibility" },
{"user-agent", 'U', "USER_AGENT", 0, "Value of HTTP User-Agent header" },
{"utf8", 'u', 0, 0, "Force input to be read as UTF-8" },
{"no-net", 'z', 0, 0, "Disable ftp and http access for parselets" },
{"no-filesystem", 'Z', 0, 0, "Disable filesystem access for parselets" },
  { 0 }
};

static error_t parse_opt (int key, char *arg, struct argp_state *state)
{
  struct arguments *arguments = state->input;
struct list_elem *base = arguments->include_files;
struct list_elem *e;

  switch (key)
    {
    case 'x':
      arguments->flags &= ~PARSLEY_OPTIONS_HTML;
break;
    case 'u':
      arguments->flags |= PARSLEY_OPTIONS_FORCE_UTF8;
break;
case 'U':
      parsley_set_user_agent(arg);
    case 'n':
arguments->flags &= ~PARSLEY_OPTIONS_PRUNE;
break;
    case 'N':
arguments->flags &= ~PARSLEY_OPTIONS_COLLATE;
break;
    case 'z':
arguments->flags &= ~PARSLEY_OPTIONS_ALLOW_NET;
break;
    case 's':
arguments->flags |= PARSLEY_OPTIONS_SGWRAP;
break;
    case 'Z':
arguments->flags &= ~PARSLEY_OPTIONS_ALLOW_LOCAL;
break;
    case 'X':
arguments->output_xml = 1;
break;
    case 'i':
e = (struct list_elem *) calloc(1, sizeof(e));
e->string = arg;
while(base->has_next) base = base->next;
base->next = e;
base->has_next = 1;
      break;
    case 'o':
      arguments->output_file = arg;
      break;
    case ARGP_KEY_ARG:
switch(state->arg_num){
case 0:
arguments->parsley = arg;
break;
case 1:
arguments->input_file = arg;
break;
default:
argp_usage (state);
}
      break;
    case ARGP_KEY_END:
      if (state->arg_num < 2) argp_usage (state);
      break;
    default:
      return ARGP_ERR_UNKNOWN;
    }
  return 0;
}

static struct argp argp = { options, parse_opt, args_doc, doc };

int main (int argc, char **argv) {
struct arguments arguments;
struct list_elem elem;
struct list_elem *elemptr = &elem;
elem.has_next = 0;
arguments.output_xml = 0;
  arguments.flags = ~0 & ~PARSLEY_OPTIONS_SGWRAP & ~PARSLEY_OPTIONS_FORCE_UTF8;
arguments.include_files = elemptr;
arguments.output_file = "-";
argp_parse (&argp, argc, argv, 0, 0, &arguments);

  struct printbuf *buf = printbuf_new();
  struct printbuf *incl = printbuf_new();
  sprintbuf(buf, "");
  sprintbuf(incl, "");
  
  FILE * fd = parsley_fopen(arguments.parsley, "r");
  printbuf_file_read(fd, buf);
  fclose(fd);

while(elemptr->has_next) {
elemptr = elemptr->next;
FILE* f = parsley_fopen(elemptr->string, "r");
printbuf_file_read(f, incl);
fclose(f);
}

  // printf("a\n");
parsleyPtr compiled = parsley_compile(buf->buf, incl->buf);
  // printf("b\n");

if(compiled->error != NULL) {
fprintf(stderr, "%s\n", compiled->error);
exit(1);
}

parsedParsleyPtr ptr = parsley_parse_file(compiled, arguments.input_file, arguments.flags);

if(ptr->error != NULL) {
fprintf(stderr, "Parsing failed: %s\n", ptr->error);
exit(1);
}

if(arguments.output_xml) {
xmlSaveFormatFile(arguments.output_file, ptr->xml, 1);
} else {
struct json_object *json = xml2json(ptr->xml->children->children);
    if(json == NULL) {
      fprintf(stderr, "xml2json unknown error");
      exit(1);
    }
    char * json_string = json_object_to_json_string(json);
FILE* f = parsley_fopen(arguments.output_file, "w");
fprintf(f, "%s\n", json_string);
    json_object_put(json);
fclose(f);
}

  printbuf_free(buf);
  printbuf_free(incl);
parsley_free(compiled);
parsed_parsley_free(ptr);
return 0;
}
Something went wrong with that request. Please try again.