Skip to content

Commit 3ffdbef

Browse files
committed
manager for string constants
1 parent 9ef5cfe commit 3ffdbef

File tree

5 files changed

+114
-5
lines changed

5 files changed

+114
-5
lines changed

lttoolbox/Makefile.am

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,13 @@
22
h_sources = alphabet.h att_compiler.h buffer.h compiler.h compression.h \
33
deserialiser.h entry_token.h expander.h fst_processor.h input_file.h lt_locale.h \
44
match_exe.h match_node.h match_state.h my_stdio.h node.h \
5-
pattern_list.h regexp_compiler.h serialiser.h sorted_vector.h state.h string_utils.h \
5+
pattern_list.h regexp_compiler.h serialiser.h sorted_vector.h state.h string_utils.h string_writer.h \
66
transducer.h trans_exe.h xml_parse_util.h xml_walk_util.h exception.h tmx_compiler.h \
77
ustring.h
88
cc_sources = alphabet.cc att_compiler.cc compiler.cc compression.cc entry_token.cc \
99
expander.cc fst_processor.cc input_file.cc lt_locale.cc match_exe.cc \
1010
match_node.cc match_state.cc node.cc pattern_list.cc \
11-
regexp_compiler.cc sorted_vector.cc state.cc string_utils.cc transducer.cc \
11+
regexp_compiler.cc sorted_vector.cc state.cc string_utils.cc string_writer.cc transducer.cc \
1212
trans_exe.cc xml_parse_util.cc xml_walk_util.cc tmx_compiler.cc ustring.cc
1313

1414
library_includedir = $(includedir)/$(PACKAGE_NAME)-$(VERSION_API)/$(PACKAGE_NAME)

lttoolbox/compression.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,15 +29,17 @@ using namespace std;
2929
// Global lttoolbox features
3030
constexpr char HEADER_LTTOOLBOX[4]{'L', 'T', 'T', 'B'};
3131
enum LT_FEATURES : uint64_t {
32-
LTF_UNKNOWN = (1ull << 0), // Features >= this are unknown, so throw an error; Inc this if more features are added
32+
LTF_MMAP = (1ull << 0), // using mmap-compatible format rather than compressed format
33+
LTF_UNKNOWN = (1ull << 1), // Features >= this are unknown, so throw an error; Inc this if more features are added
3334
LTF_RESERVED = (1ull << 63), // If we ever reach this many feature flags, we need a flag to know how to extend beyond 64 bits
3435
};
3536

3637
// Invididual transducer features
3738
constexpr char HEADER_TRANSDUCER[4]{'L', 'T', 'T', 'D'};
3839
enum TD_FEATURES : uint64_t {
3940
TDF_WEIGHTS = (1ull << 0),
40-
TDF_UNKNOWN = (1ull << 1), // Features >= this are unknown, so throw an error; Inc this if more features are added
41+
TDF_MMAP = (1ull << 1),
42+
TDF_UNKNOWN = (1ull << 2), // Features >= this are unknown, so throw an error; Inc this if more features are added
4143
TDF_RESERVED = (1ull << 63), // If we ever reach this many feature flags, we need a flag to know how to extend beyond 64 bits
4244
};
4345

lttoolbox/string_writer.cc

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
/*
2+
* Copyright (C) 2021 Apertium
3+
*
4+
* This program is free software; you can redistribute it and/or
5+
* modify it under the terms of the GNU General Public License as
6+
* published by the Free Software Foundation; either version 2 of the
7+
* License, or (at your option) any later version.
8+
*
9+
* This program is distributed in the hope that it will be useful, but
10+
* WITHOUT ANY WARRANTY; without even the implied warranty of
11+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12+
* General Public License for more details.
13+
*
14+
* You should have received a copy of the GNU General Public License
15+
* along with this program; if not, see <https://www.gnu.org/licenses/>.
16+
*/
17+
18+
#include <lttoolbox/string_writer.h>
19+
20+
#include <stdexcept>
21+
22+
UString_view
23+
StringWriter::add(const UString& s)
24+
{
25+
auto start = buffer.find(s);
26+
if (start == UString::npos) {
27+
start = buffer.size();
28+
buffer += s;
29+
}
30+
UString_view ret(buffer);
31+
return ret.substr(start, s.size());
32+
}
33+
34+
UString_view
35+
StringWriter::get(const uint32_t start, const uint32_t count)
36+
{
37+
UString_view ret(buffer);
38+
return ret.substr(start, count);
39+
}
40+
41+
void
42+
StringWriter::read(FILE* in)
43+
{
44+
uint64_t len = read_u64_le(in);
45+
buffer.clear();
46+
buffer.reserve(len);
47+
uint8_t temp[len*2]{};
48+
if (fread_unlocked(&temp, 1, len*2, in) != len) {
49+
throw std::runtime_error("Failed to read strings");
50+
}
51+
uint16_t c;
52+
for (uint64_t i = 0; i < len*2; i += 2) {
53+
buffer += static_cast<UChar>(temp[i] | (temp[i+1] << 8));
54+
}
55+
}
56+
57+
void
58+
StringWriter::write(FILE* out)
59+
{
60+
write_u64_le(out, buffer.size());
61+
uint8_t temp[buffer.size()*2]{};
62+
for (uint64_t i = 0; i < buffer.size(); i++) {
63+
temp[2*i] = buffer[i] & 0xFF;
64+
temp[2*i+1] = (buffer[i] >> 8) & 0xFF;
65+
}
66+
if (fwrite_unlocked(&temp, 1, buffer.size()*2, out) != buffer.size()*2) {
67+
throw std::runtime_error("Failed to write strings");
68+
}
69+
}

lttoolbox/string_writer.h

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
/*
2+
* Copyright (C) 2021 Apertium
3+
*
4+
* This program is free software; you can redistribute it and/or
5+
* modify it under the terms of the GNU General Public License as
6+
* published by the Free Software Foundation; either version 2 of the
7+
* License, or (at your option) any later version.
8+
*
9+
* This program is distributed in the hope that it will be useful, but
10+
* WITHOUT ANY WARRANTY; without even the implied warranty of
11+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12+
* General Public License for more details.
13+
*
14+
* You should have received a copy of the GNU General Public License
15+
* along with this program; if not, see <https://www.gnu.org/licenses/>.
16+
*/
17+
18+
#ifndef _LT_STRING_WRITER_
19+
#define _LT_STRING_WRITER_
20+
21+
// TODO: merge compression.h write_u64_le() and friends to here
22+
// when we drop compressed formats
23+
#include <lttoolbox/compression.h>
24+
#include <lttoolbox/ustring.h>
25+
#include <cstdint>
26+
27+
class StringWriter {
28+
public:
29+
UString buffer;
30+
UString_view add(const UString& s);
31+
UString_view get(const uint32_t start, const uint32_t count);
32+
void read(FILE* in);
33+
void write(FILE* out);
34+
};
35+
36+
#endif

lttoolbox/ustring.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,10 @@
2424
#include <vector>
2525
#include <cstdint>
2626
#include <iomanip>
27+
#include <string_view>
2728

2829
typedef std::basic_string<UChar> UString;
30+
typedef std::basic_string_view<UChar> UString_view;
2931

3032
void write(const UString& str, UFILE* output);
3133

@@ -43,7 +45,7 @@ operator<<(std::ostream& ostr, char16_t c)
4345
}
4446

4547
inline std::ostream&
46-
operator<<(std::ostream& ostr, const UString& str)
48+
operator<<(std::ostream& ostr, UString_view str)
4749
{
4850
std::string res;
4951
utf8::utf16to8(str.begin(), str.end(), std::back_inserter(res));

0 commit comments

Comments
 (0)