Skip to content

Commit

Permalink
Ability to process delimited lists, better memory management.
Browse files Browse the repository at this point in the history
  • Loading branch information
alexdamour committed Jun 5, 2010
1 parent e3b692a commit 655cb06
Show file tree
Hide file tree
Showing 26 changed files with 701 additions and 500 deletions.
6 changes: 3 additions & 3 deletions single/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ TRAIN_OBJS=sqlite_db_local.o db.o util.o train.o train_main.o compfun.o comp_eng
train: $(TRAIN_OBJS)
$(CC) -o $@ $(TRAIN_OBJS) $(LIBS)

DUMP_OBJS=sqlite_db_local.o db.o util.o dump_primary.o
DUMP_OBJS=sqlite_db_local.o db.o util.o dump_primary.o blockfun.o
dump: $(DUMP_OBJS)
$(CC) -o $@ $(DUMP_OBJS) $(LIBS)

Expand All @@ -67,7 +67,7 @@ clear: $(CLEAR_OBJS)
$(CC) -o $@ $(CLEAR_OBJS) $(LIBS)

clean distclean realclean:
rm -rf $(PROGS) TESTDIR eBay tags *.o *.core sqlite_db_local.[ch] primary block_idx idx simprof tset0* xset0*
rm -rf $(PROGS) TESTDIR eBay tags *.o *.core sqlite_db_local.[ch] #primary block_idx idx simprof tset0* xset0*

primaryclean:
rm -rf primary block_idx idx
Expand Down Expand Up @@ -106,7 +106,7 @@ comp_engine.o comp_spec.o: sqlite_db_local.h sqlite_db.h strcmp95.h comp_spec.h
sqlite_db_local.c sqlite_db_local.h: sqlite_db_code data.desc
./sqlite_db_code -v -c sqlite_db_local.c -h sqlite_db_local.h -f data.desc

comp_spec.c comp_spec.h compfun.c: sp_code sp.desc
comp_spec.c comp_spec.h compfun.c train.h: sp_code sp.desc
./sp_code -v -f sp.desc

lint_code:
Expand Down
23 changes: 14 additions & 9 deletions single/blockfun.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,28 @@
datatype block_type = STRING;

int blocking_callback(DB* db_secondary, const DBT* key, const DBT* data, DBT* result){
char* finit_lname;
char* fname_lname;
size_t len_lname;
size_t len_fname;
DbRecord *recordp = (DbRecord*)data->data;

db_secondary=db_secondary;
key=key;

len_lname = strlen(recordp->Lastname);
finit_lname = (char*) malloc(sizeof(char)*len_lname+3);
len_fname = strlen(recordp->Block1);
//printf("%s, %u, ", recordp->Block1, len_fname);
len_lname = strlen(recordp->Block2);
//printf("%s, %u, ", recordp->Block2, len_lname);
fname_lname = (char*) malloc(sizeof(char)*len_lname+sizeof(char)*len_fname+2);

finit_lname[0]=recordp->Firstname[0];
finit_lname[1]='.';
memcpy(finit_lname+2, recordp->Lastname, len_lname);
finit_lname[len_lname+2]='\0';
memcpy(fname_lname,recordp->Block1, len_fname);
fname_lname[len_fname]='.';
memcpy(fname_lname+len_fname+1, recordp->Block2, len_lname);
fname_lname[len_lname+len_fname+1]='\0';
//printf("%s\n", fname_lname);

result->data = finit_lname;
result->size = sizeof(char)*len_lname+3;
result->data = fname_lname;
result->size = sizeof(char)*len_lname+sizeof(char)*len_fname+2;
result->flags = result->flags | DB_DBT_APPMALLOC;
return(0);
}
33 changes: 27 additions & 6 deletions single/comp_engine.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,25 +11,46 @@
int compare_records(DBT *rec1, DBT *rec2, simprof *sp){
void *arg1, *arg2;
int (*cfunc)(const void *, const void*, size_t);
int i, freeme;
size_t sz;
int i, freeme, is_array;
size_t sz, array_len_1, array_len_2, array_len_max;

//DbRecord_dump((DbRecord*)rec1->data);
//DbRecord_dump((DbRecord*)rec2->data);

for(i=0; i < NUM_COMPS; ++i){
cfunc = comp_funcs[i];
freeme = extract((DbRecord*)rec1->data, extract_idxs[i], &arg1, &sz);
freeme = extract((DbRecord*)rec2->data, extract_idxs[i], &arg2, &sz);
*(int*)((char*)sp+sp_offsets[i]) = cfunc(arg1, arg2, sz);
freeme = extract((DbRecord*)rec1->data, extract_idxs[i], &arg1, &sz, &is_array, &array_len_1, &array_len_max);
freeme = extract((DbRecord*)rec2->data, extract_idxs[i], &arg2, &sz, &is_array, &array_len_2, &array_len_max);
if(is_array)
*(int*)((char*)sp+sp_offsets[i]) = listcmp(arg1, arg2, sz, cfunc, array_len_1, array_len_2, array_len_max);
else
*(int*)((char*)sp+sp_offsets[i]) = cfunc(arg1, arg2, sz);
// printf("\tfinal_res: %d\n", *(int*)((char*)sp+sp_offsets[i]));
if(freeme){
free(arg1);
free(arg2);
}
}
//simprof_dump(sp);
// simprof_dump(sp);

return(0);
}

int listcmp(const void* arg1, const void* arg2, size_t size,
int (*cfunc)(const void *, const void*, size_t), size_t array_len_1, size_t array_len_2, size_t array_len){
int res=0;
int i,j;

//printf("array_len_1: %d, array_len_2: %d\n", array_len_1, array_len_2);
for(i=0; i<MAX((int)array_len_1,1); ++i){
for(j=0; j<MAX((int)array_len_2,1); ++j){
res = MAX(res,cfunc((void*)((char*)arg1+size*i), (void*)((char*)arg2+size*j), size));
}
}

return res;
}

int simprof_dump(simprof* sp){
int i;
printf("(");
Expand Down
2 changes: 2 additions & 0 deletions single/comp_engine.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "comp_spec.h"

extern int compare_records(DBT *, DBT *, simprof *);
int listcmp(const void *, const void *, size_t, int (*)(const void *, const void*, size_t), size_t, size_t, size_t);

//From comp_spec.c
int num_comps;
Expand All @@ -10,6 +11,7 @@ size_t sp_offsets[];

char* has_tag(DbRecord*);
int apply_tag(DbRecord*, char*);
int tagcmp(DbRecord*, DbRecord*);

int stop_comp(DbRecord*, DbRecord*);

Expand Down
6 changes: 3 additions & 3 deletions single/comp_spec.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@
int (*comp_funcs[])(const void*, const void*, size_t) = {
jwcmp,
midnamecmp,
jwcmp,
distcmp,
disttypecmp,
asgcmp,
jwcmp,
classcmp,
coauthcmp,
};
Expand All @@ -22,10 +22,10 @@ int (*comp_funcs[])(const void*, const void*, size_t) = {
int extract_idxs[] = {
SQLITE_DB_INDX_FIRSTNAME,
SQLITE_DB_INDX_FIRSTNAME,
SQLITE_DB_INDX_LASTNAME,
LATLON,
SQLITE_DB_INDX_COUNTRY,
ASG_FIELDS,
SQLITE_DB_INDX_LAW_ID,
SQLITE_DB_INDX_CLASS,
SQLITE_DB_INDX_COAUTHS,
};
Expand All @@ -34,10 +34,10 @@ int extract_idxs[] = {
size_t sp_offsets[] = {
SP_OFFSET(fname),
SP_OFFSET(midname),
SP_OFFSET(lname),
SP_OFFSET(dist),
SP_OFFSET(dt),
SP_OFFSET(asg),
SP_OFFSET(firm),
SP_OFFSET(cl),
SP_OFFSET(coauths),
};
Expand Down
11 changes: 7 additions & 4 deletions single/comp_spec.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
#include <math.h>
#include "strcmp95.h"

#define TRIPLET_ON 1
#define LIK_CUTOFF 0.95

#define LATLON SQLITE_DB_NUMFIELDS
#define ASG_FIELDS SQLITE_DB_NUMFIELDS+1

Expand All @@ -33,7 +36,7 @@ typedef struct {
/* End custom header from sp.desc file. */

/* Define result space levels. */
typedef enum {JWSUB75,JWMISSING,JW75,JW85,JW95,JW100} jwres;
typedef enum {JWSUB33,JWMISSING,JW66,JW100,JW100MULT,JW100MULTFULL} jwres;
typedef enum {DIST100PLUS,DISTMISSING,DIST100,DIST75,DIST50,DIST10,DIST0} distres;
typedef enum {NO_STREET,HAVE_STREET} disttype;
typedef enum {CLASS0,CLASSMISS,CLASS25,CLASS50,CLASS75PLUS} classres;
Expand All @@ -44,10 +47,10 @@ typedef enum {M0,MMISSING,M33,M67,M100} midnameres;
typedef struct __simprof {
jwres fname;
midnameres midname;
jwres lname;
distres dist;
disttype dt;
jwres asg;
jwres firm;
classres cl;
coauthres coauths;
} simprof;
Expand All @@ -59,14 +62,14 @@ size_t sp_offsets[];

/* Custom function prototypes. */
/* Extractor function. */
int extract(DbRecord*, const int, void**, size_t*);
int extract(DbRecord*, const int, void**, size_t*, int*, size_t*, size_t*);
/* Comparison functions. */
int jwcmp(const void*, const void*, size_t);
int midnamecmp(const void*, const void*, size_t);
int jwcmp(const void*, const void*, size_t);
int distcmp(const void*, const void*, size_t);
int disttypecmp(const void*, const void*, size_t);
int asgcmp(const void*, const void*, size_t);
int jwcmp(const void*, const void*, size_t);
int classcmp(const void*, const void*, size_t);
int coauthcmp(const void*, const void*, size_t);

Expand Down
Loading

0 comments on commit 655cb06

Please sign in to comment.