Skip to content

Commit

Permalink
Merge pull request #379 from cspiegel/bocfel-1.1
Browse files Browse the repository at this point in the history
Update Bocfel to 1.1.
  • Loading branch information
cspiegel committed Jun 21, 2020
2 parents 913bc98 + 50ae69f commit d9109c5
Show file tree
Hide file tree
Showing 24 changed files with 698 additions and 399 deletions.
2 changes: 1 addition & 1 deletion terps/Jamfile
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ if $(MAKE_ALAN3) = yes

# -----------------------------------------------------------------------
#
# Bocfel 1.0.1
# Bocfel 1.1
#

if $(MAKE_BOCFEL) = yes
Expand Down
93 changes: 55 additions & 38 deletions terps/bocfel/dict.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,34 @@
#include "util.h"
#include "zterp.h"

static uint16_t separators;
static uint8_t num_separators;
struct dictionary
{
uint16_t addr;
uint8_t num_separators;
uint16_t separators;
uint8_t entry_length;
long num_entries;
uint16_t base;
};

static struct dictionary new_dictionary(uint16_t addr)
{
uint8_t num_separators = user_byte(addr);

struct dictionary dictionary = (struct dictionary) {
.addr = addr,
.num_separators = num_separators,
.separators = addr + 1,
.entry_length = user_byte(addr + num_separators + 1),
.num_entries = as_signed(user_word(addr + num_separators + 2)),
.base = addr + 1 + num_separators + 1 + 2,
};

ZASSERT(dictionary.entry_length >= (zversion <= 3 ? 4 : 6), "dictionary entry length (%d) too small", dictionary.entry_length);
ZASSERT(dictionary.base + (labs(dictionary.num_entries) * dictionary.entry_length) < memory_size, "reported dictionary length extends beyond memory size");

return dictionary;
}

static uint16_t get_word(const uint8_t *base)
{
Expand Down Expand Up @@ -112,7 +138,7 @@ static void encode_string(const uint8_t *s, size_t len, uint8_t encoded[static 8
int shift = pos / 26;
int c = pos % 26;

if(shift) add_zchar(shiftbase + shift, n++, encoded);
if(shift > 0) add_zchar(shiftbase + shift, n++, encoded);
add_zchar(c + 6, n++, encoded);
}
else
Expand All @@ -138,32 +164,22 @@ static int dict_compar(const void *a, const void *b)
{
return memcmp(a, b, zversion <= 3 ? 4 : 6);
}
static uint16_t dict_find(const uint8_t *token, size_t len, uint16_t dictionary)
static uint16_t dict_find(const uint8_t *token, size_t len, const struct dictionary *dictionary)
{
uint8_t elength;
uint16_t base;
long nentries;
const uint8_t *ret = NULL;
uint8_t encoded[8];

encode_string(token, len, encoded);

elength = user_byte(dictionary + num_separators + 1);
nentries = as_signed(user_word(dictionary + num_separators + 2));
base = dictionary + num_separators + 2 + 2;

ZASSERT(elength >= (zversion <= 3 ? 4 : 6), "dictionary entry length (%d) too small", elength);
ZASSERT(base + (labs(nentries) * elength) < memory_size, "reported dictionary length extends beyond memory size");

if(nentries > 0)
if(dictionary->num_entries > 0)
{
ret = bsearch(encoded, &memory[base], nentries, elength, dict_compar);
ret = bsearch(encoded, &memory[dictionary->base], dictionary->num_entries, dictionary->entry_length, dict_compar);
}
else
{
for(long i = 0; i < -nentries; i++)
for(long i = 0; i < -dictionary->num_entries; i++)
{
const uint8_t *entry = &memory[base + (i * elength)];
const uint8_t *entry = &memory[dictionary->base + (i * dictionary->entry_length)];

if(dict_compar(encoded, entry) == 0)
{
Expand All @@ -175,19 +191,19 @@ static uint16_t dict_find(const uint8_t *token, size_t len, uint16_t dictionary)

if(ret == NULL) return 0;

return base + (ret - &memory[base]);
return dictionary->base + (ret - &memory[dictionary->base]);
}

static bool is_sep(uint8_t c)
static bool is_sep(uint8_t c, const struct dictionary *dictionary)
{
if(c == ZSCII_SPACE) return true;

for(uint16_t i = 0; i < num_separators; i++) if(user_byte(separators + i) == c) return true;
for(uint16_t i = 0; i < dictionary->num_separators; i++) if(byte(dictionary->separators + i) == c) return true;

return false;
}

static uint16_t lookup_replacement(uint16_t original, const uint8_t *replacement, size_t replen, uint16_t dictionary)
static uint16_t lookup_replacement(uint16_t original, const uint8_t *replacement, size_t replen, const struct dictionary *dictionary)
{
uint16_t d;

Expand All @@ -198,18 +214,17 @@ static uint16_t lookup_replacement(uint16_t original, const uint8_t *replacement
return d;
}

static void handle_token(const uint8_t *base, const uint8_t *token, size_t len, uint16_t parse, uint16_t dictionary, int found, bool flag, bool start_of_sentence)
static void handle_token(const uint8_t *base, const uint8_t *token, size_t len, uint16_t parse, const struct dictionary *dictionary, int found, bool flag, bool start_of_sentence)
{
uint16_t d;

d = dict_find(token, len, dictionary);

if(
zversion < 5 &&
len == 1 &&
is_infocom_v1234 &&
start_of_sentence &&
!options.disable_abbreviations &&
len == 1)
!options.disable_abbreviations)
{
const uint8_t examine[] = { 'e', 'x', 'a', 'm', 'i', 'n', 'e' };
const uint8_t again[] = { 'a', 'g', 'a', 'i', 'n' };
Expand Down Expand Up @@ -253,7 +268,7 @@ static void handle_token(const uint8_t *base, const uint8_t *token, size_t len,
* • The next byte is the length of the token.
* • The final byte is the offset in the string of the token.
*/
void tokenize(uint16_t text, uint16_t parse, uint16_t dictionary, bool flag)
void tokenize(uint16_t text, uint16_t parse, uint16_t dictaddr, bool flag)
{
const uint8_t *p, *lastp;
const uint8_t *string;
Expand All @@ -262,13 +277,13 @@ void tokenize(uint16_t text, uint16_t parse, uint16_t dictionary, bool flag)
bool in_word = false;
int found = 0;
bool start_of_sentence = true;
struct dictionary dictionary;

if(dictionary == 0) dictionary = header.dictionary;
if(dictaddr == 0) dictaddr = header.dictionary;

ZASSERT(dictionary != 0, "attempt to tokenize without a valid dictionary");
ZASSERT(dictaddr != 0, "attempt to tokenize without a valid dictionary");

num_separators = user_byte(dictionary);
separators = dictionary + 1;
dictionary = new_dictionary(dictaddr);

if(zversion >= 5) text_len = user_byte(text + 1);
else while(user_byte(text + 1 + text_len) != 0) text_len++;
Expand All @@ -277,31 +292,33 @@ void tokenize(uint16_t text, uint16_t parse, uint16_t dictionary, bool flag)

string = &memory[text + 1 + (zversion >= 5)];

for(p = string; p - string < text_len && *p == ZSCII_SPACE; p++);
for(p = string; p - string < text_len && *p == ZSCII_SPACE; p++)
{
}
lastp = p;

text_len -= (p - string);

do
{
if(!in_word && text_len != 0 && !is_sep(*p))
if(!in_word && text_len != 0 && !is_sep(*p, &dictionary))
{
in_word = true;
lastp = p;
}

if(text_len == 0 || is_sep(*p))
if(text_len == 0 || is_sep(*p, &dictionary))
{
if(in_word)
{
handle_token(string, lastp, p - lastp, parse, dictionary, found++, flag, start_of_sentence);
handle_token(string, lastp, p - lastp, parse, &dictionary, found++, flag, start_of_sentence);
start_of_sentence = false;
}

/* §13.6.1: Separators (apart from a space) are tokens too. */
if(text_len != 0 && *p != ZSCII_SPACE)
{
handle_token(string, p, 1, parse, dictionary, found++, flag, start_of_sentence);
handle_token(string, p, 1, parse, &dictionary, found++, flag, start_of_sentence);

start_of_sentence = *p == '.';
}
Expand All @@ -313,7 +330,7 @@ void tokenize(uint16_t text, uint16_t parse, uint16_t dictionary, bool flag)

p++;

} while(text_len--);
} while(text_len-- > 0);

user_store_byte(parse + 1, found);
}
Expand All @@ -334,7 +351,7 @@ void ztokenise(void)
if(znargs < 3) zargs[2] = 0;
if(znargs < 4) zargs[3] = 0;

tokenize(zargs[0], zargs[1], zargs[2], zargs[3]);
tokenize(zargs[0], zargs[1], zargs[2], zargs[3] != 0);
}

void zencode_text(void)
Expand Down
Loading

0 comments on commit d9109c5

Please sign in to comment.