/
parser.c
97 lines (96 loc) · 2.76 KB
/
parser.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
/*
* parser for input
*/
#include <stdio.h>
#include <stdlib.h>
#include <wchar.h>
#include <wctype.h>
#include <stdbool.h>
#include "markov.h"
/**
* Reads two words into a varstr from a UCS-2 string.
* Returns (negative if end of string) the amount of characters
* to advance the buffer by.
*/
static signed int r2w(struct varstr *into, wchar_t *from) {
register int spaces = 0, i = 0;
wchar_t c = L'\2';
for (; c != L'\0'; c = from[i++]) {
if (iswspace(c) != 0 && ++spaces == 2) break;
if (c == L'\n') break;
if (iswspace(c) == 0 && iswpunct(c) == 0 && iswalnum(c) == 0) continue;
if (varstr_pushc(into, c) == NULL) {
perror("r2w(): varstr_pushc()");
return -1;
}
}
varstr_pushc(into, '\0');
return (c == L'\0' ? (i - i*2) : i);
}
/**
* Loops through text and, calling r2w() on it, breaks it up into (and stores)
* kv pairs.
*
* Set is_sentence according to whether the text should be interpreted as a sentence or not.
*/
extern bool read_data(wchar_t *text, bool is_sentence) {
wchar_t *last = NULL;
struct varstr *cur = NULL;
signed int read_last = 0;
bool is_ss = (is_sentence ? true : false);
for (last = NULL;;) {
cur = varstr_init();
if (cur == NULL) {
perror("init varstr in read_data()");
return false;
}
read_last = r2w(cur, text);
if (last != NULL) {
wchar_t *v;
wchar_t *now = last;
if ((last = v = varstr_pack(cur)) == NULL) {
perror("packing varstr in read_data()");
return false;
}
store_kv(now, v, is_ss);
is_ss = false;
}
if (last == NULL && (last = varstr_pack(cur)) == NULL) {
perror("packing varstr in read_data()");
return false;
}
if (read_last < 0) {
break;
}
text += read_last;
}
return true;
}
/**
* Read a line of input from file pointer fp (pre-opened).
* Returns 1 on EOF, 2 on error, and 0 otherwise.
*
* is_sentence == true if the line is a complete sentence.
*/
extern int read_input(FILE *fp, bool is_sentence) {
struct varstr *buf = varstr_init();
if (buf == NULL) {
perror("init varstr in read_input()");
return 2;
}
for (wchar_t c = fgetwc(fp); c != WEOF && c != L'\n'; c = fgetwc(fp)) {
if (varstr_pushc(buf, c) == NULL) break;
}
if (ferror(fp)) {
perror("reading file in read_input()");
return 2;
}
wchar_t *str;
if ((str = varstr_pack(buf)) == NULL) {
perror("packing varstr in read_input()");
return 2;
}
if (!read_data(str, is_sentence)) return 2;
free(str);
return (feof(fp) ? 1 : 0);
}