Skip to content
This repository was archived by the owner on May 12, 2021. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions core/Lucy/Analysis/Analyzer.cfh
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ public abstract class Lucy::Analysis::Analyzer inherits Clownfish::Obj {
/** Take a single [](cfish:Inversion) as input
* and returns an Inversion, either the same one (presumably transformed
* in some way), or a new one.
*
* @param inversion An inversion.
*/
public abstract incremented Inversion*
Transform(Analyzer *self, Inversion *inversion);
Expand All @@ -40,18 +42,41 @@ public abstract class Lucy::Analysis::Analyzer inherits Clownfish::Obj {
* The default implementation simply creates an initial Inversion with a
* single Token, then calls [](cfish:.Transform), but occasionally subclasses will
* provide an optimized implementation which minimizes string copies.
*
* @param text A string.
*/
public incremented Inversion*
Transform_Text(Analyzer *self, String *text);

/** Analyze text and return an array of token texts.
*
* @param text A string.
*/
public incremented Vector*
Split(Analyzer *self, String *text);

/** Dump the analyzer as hash.
*
* Subclasses should call [](.Dump) on the superclass. The returned
* object is a hash which should be populated with parameters of
* the analyzer.
*
* @return A hash containing a description of the analyzer.
*/
public incremented Obj*
Dump(Analyzer *self);

/** Reconstruct an analyzer from a dump.
*
* Subclasses should first call [](.Load) on the superclass. The
* returned object is an analyzer which should be reconstructed by
* setting the dumped parameters from the hash contained in `dump`.
*
* Note that the invocant analyzer is unused.
*
* @param dump A hash.
* @return An analyzer.
*/
public incremented Obj*
Load(Analyzer *self, Obj *dump);
}
Expand Down
11 changes: 6 additions & 5 deletions core/Lucy/Analysis/Inversion.cfh
Original file line number Diff line number Diff line change
Expand Up @@ -33,27 +33,28 @@ public class Lucy::Analysis::Inversion inherits Clownfish::Obj {
uint32_t cluster_counts_size; /* num unique texts */

/**
* @param seed An initial Token to start things off, which may be NULL.
* @param seed An initial Token to start things off, which may be
* [](@null).
*/
inert incremented Inversion*
public inert incremented Inversion*
new(Token *seed = NULL);

/** Tack a token onto the end of the Inversion.
*
* @param token A Token.
*/
void
public void
Append(Inversion *self, decremented Token *token);

/** Return the next token in the Inversion until out of tokens.
*/
nullable Token*
public nullable Token*
Next(Inversion *self);

/** Reset the Inversion's iterator, so that the next call to next()
* returns the first Token in the inversion.
*/
void
public void
Reset(Inversion *self);

/** Assign positions to constituent Tokens, tallying up the position
Expand Down
33 changes: 21 additions & 12 deletions core/Lucy/Analysis/Token.cfh
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,7 @@ parcel Lucy;
* `boost` is a per-token weight. Use this when you want to assign
* more or less importance to a particular token, as you might for emboldened
* text within an HTML document, for example. (Note: The field this token
* belongs to must be spec'd to use a posting of type
* [](cfish:RichPosting).)
* belongs to must be spec'd to use a posting of type RichPosting.)
*
* `pos_inc` is the POSition INCrement, measured in Tokens. This
* attribute, which defaults to 1, is a an advanced tool for manipulating
Expand All @@ -49,7 +48,7 @@ parcel Lucy;
* will end up assigned to positions 0, 1, and 1001 -- and will no longer
* produce a phrase match for the query `"three blind mice"`.
*/
class Lucy::Analysis::Token inherits Clownfish::Obj {
public class Lucy::Analysis::Token inherits Clownfish::Obj {

char *text;
size_t len;
Expand All @@ -59,11 +58,21 @@ class Lucy::Analysis::Token inherits Clownfish::Obj {
int32_t pos_inc;
int32_t pos;

inert incremented Token*
public inert incremented Token*
new(const char *text, size_t len, uint32_t start_offset,
uint32_t end_offset, float boost = 1.0, int32_t pos_inc = 1);

inert Token*
/**
* @param text A UTF-8 string.
* @param len Size of the string in bytes.
* @param start_offset Start offset into the original document in Unicode
* code points.
* @param start_offset End offset into the original document in Unicode
* code points.
* @param boost Per-token weight.
* @param pos_inc Position increment for phrase matching.
*/
public inert Token*
init(Token *self, const char *text, size_t len,
uint32_t start_offset, uint32_t end_offset,
float boost = 1.0, int32_t pos_inc = 1);
Expand All @@ -73,16 +82,16 @@ class Lucy::Analysis::Token inherits Clownfish::Obj {
inert int
compare(const void *va, const void *vb);

uint32_t
public uint32_t
Get_Start_Offset(Token *self);

uint32_t
public uint32_t
Get_End_Offset(Token *self);

float
public float
Get_Boost(Token *self);

int32_t
public int32_t
Get_Pos_Inc(Token *self);

/** Accessor for pos. Only valid after position increments for an array
Expand All @@ -91,13 +100,13 @@ class Lucy::Analysis::Token inherits Clownfish::Obj {
int32_t
Get_Pos(Token *self);

char*
public char*
Get_Text(Token *self);

size_t
public size_t
Get_Len(Token *self);

void
public void
Set_Text(Token *self, char *text, size_t len);

public void
Expand Down
4 changes: 2 additions & 2 deletions core/Lucy/Docs/IRTheory.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@ formal introduction:
Since Lucy is a practical implementation of IR theory, it loads these
abstract, distilled definitions down with useful traits. For instance, a
"posting" in its most rarefied form is simply a term-document pairing; in
Lucy, the class [](cfish:lucy.MatchPosting) fills this
Lucy, the class MatchPosting fills this
role. However, by associating additional information with a posting like the
number of times the term occurs in the document, we can turn it into a
[](cfish:lucy.ScorePosting), making it possible
ScorePosting, making it possible
to rank documents by relevance rather than just list documents which happen to
match in no particular order.

Expand Down
22 changes: 11 additions & 11 deletions core/Lucy/Document/Doc.cfh
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,11 @@ public class Lucy::Document::Doc inherits Clownfish::Obj {
Get_Doc_ID(Doc *self);

/** Store a field value in the Doc.
*
* @param field The field name
* @param value The value
*/
void
public void
Store(Doc *self, String *field, Obj *value);

/** Set the doc's field's attribute.
Expand All @@ -68,34 +71,31 @@ public class Lucy::Document::Doc inherits Clownfish::Obj {
public uint32_t
Get_Size(Doc *self);

/** Retrieve the field's value, or NULL if the field is not present. If
* the field is a text type, assign it to `target`. Otherwise,
* return the interior object. Callers must check to verify the kind of
* object returned.
/** Retrieve the field's value, or NULL if the field is not present.
*/
nullable incremented Obj*
public nullable incremented Obj*
Extract(Doc *self, String *field);

/** Return a list of names of all fields present.
*/
incremented Vector*
public incremented Vector*
Field_Names(Doc *self);

/* Unimplemented methods.
*/
public bool
Equals(Doc *self, Obj *other);

public void
void
Serialize(Doc *self, OutStream *outstream);

public incremented Doc*
incremented Doc*
Deserialize(decremented Doc *self, InStream *instream);

public incremented Hash*
incremented Hash*
Dump(Doc *self);

public incremented Doc*
incremented Doc*
Load(Doc *self, Obj *dump);

public void
Expand Down
8 changes: 4 additions & 4 deletions core/Lucy/Document/HitDoc.cfh
Original file line number Diff line number Diff line change
Expand Up @@ -53,16 +53,16 @@ public class Lucy::Document::HitDoc inherits Lucy::Document::Doc {
public bool
Equals(HitDoc *self, Obj *other);

public incremented Hash*
incremented Hash*
Dump(HitDoc *self);

public incremented HitDoc*
incremented HitDoc*
Load(HitDoc *self, Obj *dump);

public void
void
Serialize(HitDoc *self, OutStream *outstream);

public incremented HitDoc*
incremented HitDoc*
Deserialize(decremented HitDoc *self, InStream *instream);
}

Expand Down
2 changes: 1 addition & 1 deletion core/Lucy/Index/DataReader.cfh
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ public class Lucy::Index::DataReader inherits Clownfish::Obj {
* performed upon either the reader or any component subreaders other than
* object destruction.
*/
public abstract void
abstract void
Close(DataReader *self);

public void
Expand Down
2 changes: 1 addition & 1 deletion core/Lucy/Index/DataWriter.cfh
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ public class Lucy::Index::DataWriter inherits Clownfish::Obj {
* @param doc_id Internal number assigned to this document within the
* segment.
*/
public abstract void
abstract void
Add_Inverted_Doc(DataWriter *self, Inverter *inverter, int32_t doc_id);

/** Add content from an existing segment into the one currently being
Expand Down
4 changes: 2 additions & 2 deletions core/Lucy/Index/DeletionsReader.cfh
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ class Lucy::Index::PolyDeletionsReader nickname PolyDelReader
incremented Matcher*
Iterator(PolyDeletionsReader *self);

public void
void
Close(PolyDeletionsReader *self);

public void
Expand Down Expand Up @@ -89,7 +89,7 @@ class Lucy::Index::DefaultDeletionsReader nickname DefDelReader
nullable BitVector*
Read_Deletions(DefaultDeletionsReader *self);

public void
void
Close(DefaultDeletionsReader *self);

public void
Expand Down
10 changes: 5 additions & 5 deletions core/Lucy/Index/DeletionsWriter.cfh
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ public abstract class Lucy::Index::DeletionsWriter nickname DelWriter

/** Delete the document identified in the PolyReader by the supplied id.
*/
public abstract void
abstract void
Delete_By_Doc_ID(DeletionsWriter *self, int32_t doc_id);

/** Returns true if there are updates that need to be written.
Expand All @@ -76,15 +76,15 @@ public abstract class Lucy::Index::DeletionsWriter nickname DelWriter
* With an offset of 1000, the array in the previous example would be
* { 1001, 0, 1002, 1003 }.
*/
public incremented I32Array*
incremented I32Array*
Generate_Doc_Map(DeletionsWriter *self, Matcher *deletions,
int32_t doc_max, int32_t offset);

/** Return a deletions iterator for the supplied SegReader, which must be
* a component within the PolyReader that was supplied at
* construction-time.
*/
public abstract incremented nullable Matcher*
abstract incremented nullable Matcher*
Seg_Deletions(DeletionsWriter *self, SegReader *seg_reader);

/** Return the number of deletions for a given segment.
Expand Down Expand Up @@ -130,13 +130,13 @@ class Lucy::Index::DefaultDeletionsWriter nickname DefDelWriter
public void
Delete_By_Query(DefaultDeletionsWriter *self, Query *query);

public void
void
Delete_By_Doc_ID(DefaultDeletionsWriter *self, int32_t doc_id);

public bool
Updated(DefaultDeletionsWriter *self);

public incremented nullable Matcher*
incremented nullable Matcher*
Seg_Deletions(DefaultDeletionsWriter *self, SegReader *seg_reader);

public int32_t
Expand Down
4 changes: 2 additions & 2 deletions core/Lucy/Index/DocReader.cfh
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ class Lucy::Index::PolyDocReader inherits Lucy::Index::DocReader {
public incremented HitDoc*
Fetch_Doc(PolyDocReader *self, int32_t doc_id);

public void
void
Close(PolyDocReader *self);

public void
Expand Down Expand Up @@ -92,7 +92,7 @@ class Lucy::Index::DefaultDocReader nickname DefDocReader
void
Read_Record(DefaultDocReader *self, ByteBuf *buffer, int32_t doc_id);

public void
void
Close(DefaultDocReader *self);

public void
Expand Down
4 changes: 2 additions & 2 deletions core/Lucy/Index/DocVector.cfh
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,10 @@ class Lucy::Index::DocVector nickname DocVec
Blob*
Field_Buf(DocVector *self, String *field);

public void
void
Serialize(DocVector *self, OutStream *outstream);

public incremented DocVector*
incremented DocVector*
Deserialize(decremented DocVector *self, InStream *instream);

public void
Expand Down
2 changes: 1 addition & 1 deletion core/Lucy/Index/DocWriter.cfh
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class Lucy::Index::DocWriter inherits Lucy::Index::DataWriter {
init(DocWriter *self, Schema *schema, Snapshot *snapshot,
Segment *segment, PolyReader *polyreader);

public void
void
Add_Inverted_Doc(DocWriter *self, Inverter *inverter, int32_t doc_id);

public void
Expand Down
4 changes: 2 additions & 2 deletions core/Lucy/Index/HighlightReader.cfh
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ class Lucy::Index::PolyHighlightReader nickname PolyHLReader
public incremented DocVector*
Fetch_Doc_Vec(PolyHighlightReader *self, int32_t doc_id);

public void
void
Close(PolyHighlightReader *self);

public void
Expand Down Expand Up @@ -86,7 +86,7 @@ class Lucy::Index::DefaultHighlightReader nickname DefHLReader
Read_Record(DefaultHighlightReader *self, int32_t doc_id,
ByteBuf *buffer);

public void
void
Close(DefaultHighlightReader *self);

public void
Expand Down
Loading