-
Notifications
You must be signed in to change notification settings - Fork 1.9k
/
parse_primitives.cc
91 lines (77 loc) · 2.17 KB
/
parse_primitives.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
/*
Copyright (c) by respective owners including Yahoo!, Microsoft, and
individual contributors. All rights reserved. Released under a BSD (revised)
license as described in the file LICENSE.
*/
#include <iostream>
#ifndef WIN32
#include <strings.h>
#else
#include <string>
#endif
#include <stdexcept>
#include <sstream>
#include "parse_primitives.h"
#include "hash.h"
#include "vw_exception.h"
bool substring_equal(substring&a, substring&b)
{ return (a.end - a.begin == b.end - b.begin) // same length
&& (strncmp(a.begin, b.begin, a.end - a.begin) == 0);
}
void tokenize(char delim, substring s, v_array<substring>& ret, bool allow_empty)
{ ret.erase();
char *last = s.begin;
for (; s.begin != s.end; s.begin++)
{ if (*s.begin == delim)
{ if (allow_empty || (s.begin != last))
{ substring temp = {last, s.begin};
ret.push_back(temp);
}
last = s.begin+1;
}
}
if (allow_empty || (s.begin != last))
{ substring final = {last, s.begin};
ret.push_back(final);
}
}
uint64_t hashstring (substring s, uint64_t h)
{ //trim leading whitespace but not UTF-8
for(; s.begin < s.end && *(s.begin) <= 0x20 && (int)*(s.begin)>= 0; s.begin++);
//trim trailing white space but not UTF-8
for(; s.end > s.begin && *(s.end-1) <= 0x20 && (int)*(s.end-1) >=0; s.end--);
size_t ret = 0;
char *p = s.begin;
while (p != s.end)
if (*p >= '0' && *p <= '9')
ret = 10*ret + *(p++) - '0';
else
return uniform_hash((unsigned char *)s.begin, s.end - s.begin, h);
return ret + h;
}
uint64_t hashall (substring s, uint64_t h)
{ return uniform_hash((unsigned char *)s.begin, s.end - s.begin, h); }
hash_func_t getHasher(const std::string& s)
{ if (s=="strings")
return hashstring;
else if(s=="all")
return hashall;
else
THROW("Unknown hash function: " << s);
}
std::ostream& operator<<(std::ostream& os, const substring& ss)
{ std::string s(ss.begin, ss.end-ss.begin);
return os << s;
}
std::ostream& operator<<(std::ostream& os, const v_array<substring>& ss)
{ substring* it = ss.cbegin();
if (it == ss.cend())
{ return os;
}
os << *it;
for (it++; it != ss.cend(); it++)
{ os << ",";
os << *it;
}
return os;
}