293 changes: 293 additions & 0 deletions gnfs/relation.c
Expand Up @@ -638,6 +638,245 @@ static void nfs_get_cycle_relations(msieve_obj *obj,
mpz_clear(scratch);
}

/*------------------------------------------------------------------
Modification to msieve from FaaS
Struct: relcount_thread_t
This is a modified version of relcount_t. It allows for the storage
of a dependency flag. If the bit is set, then the relation belongs
to the dependency in question. This allows for the relations file to
be read in one pass, and have all of the relations copied to the
appropriate dependencies.
Used in nfs_get_cycle_relations_threaded() mainly.
-------------------------------------------------------------------*/

typedef struct {
uint32 relidx;
uint64 dep_flags;
} relcount_thread_t;

/*------------------------------------------------------------------
Modification to msieve from FaaS
Method: compare_relcount_thread()
This is a comparator that is used in the quicksort method. This
allows for the sorting of all of the relcount_thread_t structs
based on their relidx.
Used in nfs_get_cycle_relations_threaded()
-------------------------------------------------------------------*/

int compare_relcount_thread (const void * a, const void * b) {
return compare_uint32(&(((relcount_thread_t *) a)->relidx),
&(((relcount_thread_t *) b)->relidx));
}

/*------------------------------------------------------------------
Modification to msieve from FaaS
Method: nfs_get_cycle_relations_threaded()
This is a modified version of nfs_get_cycle_relations() that enables the
threading of the square root stage.
The key modification is that it creates lists of relations for each of
the dependencies in one pass of the .dat relation file. By creating all
of the dependency relations objects in one go, this allows us to save on
file IO time and to create threads for multiple dependencies at the same time.
-------------------------------------------------------------------*/

static void nfs_get_cycle_relations_threaded(msieve_obj *obj,
factor_base_t *fb, la_dep_t *dep_cycle_list,
relation_lists_t **dep_rel_list_out,
uint32 dep_lower,
uint32 dep_upper) {
uint32 i, j, k;
char buf[LINE_BUF_SIZE];
relation_lists_t *dep_rel_list;
savefile_t *savefile = &obj->savefile;

hashtable_t unique_relidx;
uint32 num_unique_relidx;
relcount_thread_t *relidx_list;
relcount_thread_t *entry;

uint8 tmp_factors[COMPRESSED_P_MAX_SIZE];
uint32 factor_size;
relation_t tmp_relation;
mpz_t scratch;

tmp_relation.factors = tmp_factors;

hashtable_init(&unique_relidx,
(uint32)WORDS_IN(relcount_thread_t),
(uint32)1);

/* Fill the hashtable using the relation ids for each dependency obtained
in read_cycles_threaded(). If the relation has already been added, simply
toggle the bit in relcount_thread_t belonging to the struct.
Doing this also neatly "squeezes" out the relations that occurs an
even number of times.
(author: not entirely sure why "squeezing" is necessary, but the same
operation is performed in nfs_get_cycle_relations(), so I figured it
should be included) */

for (i = dep_lower; i <= dep_upper; i++) {
la_dep_t *dep = dep_cycle_list + i - dep_lower;
for (j = 0; j < dep->num_cycles; j++) {
la_col_t *c = dep->column + j;
uint32 num_relations = c->cycle.num_relations;
uint32 *list = c->cycle.list;

for (k = 0; k < num_relations; k++) {
uint32 already_seen;
entry = (relcount_thread_t *)hashtable_find(
&unique_relidx, list + k,
NULL, &already_seen);
if (!already_seen)
entry->dep_flags = 1 << (i - 1);
else
entry->dep_flags ^= 1 << (i - 1);
}
}
}

/* Convert the internal list of hashtable entries into
a list of relcount_thread_t. Discard all relcount_thread_t where the
dependency flag is 0 (it has been squeezed out from all deps) */

hashtable_close(&unique_relidx);
num_unique_relidx = hashtable_get_num(&unique_relidx);
entry = (relcount_thread_t *)hashtable_get_first(&unique_relidx);
relidx_list = (relcount_thread_t *)unique_relidx.match_array;

for (i = j = 0; i < num_unique_relidx; i++) {
if (entry->dep_flags != 0)
relidx_list[j++] = *entry;

entry = (relcount_thread_t *)hashtable_get_next(
&unique_relidx, entry);
}
num_unique_relidx = j;

/* sort the list in order of increasing relation number */
qsort(relidx_list, (size_t)num_unique_relidx,
sizeof(relcount_thread_t), compare_relcount_thread);

logprintf(obj, "Sqrt: cycles contain %u unique relations\n",
num_unique_relidx);

savefile_open(savefile, SAVEFILE_READ);

/* assign space for relation lists for each dependency */
dep_rel_list = (relation_lists_t *)xcalloc((size_t)(dep_upper
- dep_lower + 1),
sizeof(relation_lists_t));

for (i = dep_lower; i <= dep_upper; i++) {
relation_lists_t *rlists;

rlists = dep_rel_list + i - dep_lower;
rlists->rlist = (relation_t *)xcalloc((size_t) num_unique_relidx,
sizeof(relation_t));
rlists->dep_no = i;
rlists->num_relations = 0;
}

/* read the list of relations */

i = (uint32)(-1);
j = 0;
savefile_read_line(buf, sizeof(buf), savefile);
mpz_init(scratch);
while (!savefile_eof(savefile) && j < num_unique_relidx) {

int32 status;

if (buf[0] != '-' && !isdigit(buf[0])) {

/* no relation at this line */

savefile_read_line(buf, sizeof(buf), savefile);
continue;
}
if (++i < relidx_list[j].relidx) {

/* relation not needed */

savefile_read_line(buf, sizeof(buf), savefile);
continue;
}

status = nfs_read_relation(buf, fb, &tmp_relation,
&factor_size, 0,
scratch, 0);
if (status) {
/* at this point, if the relation couldn't be
read then the filtering stage should have
found that out and skipped it */

logprintf(obj, "error: relation %u corrupt\n", i);
exit(-1);
}
else {

/* Iterate through all of the dependencies.
Save the relation if it matches the dependency */
for (k = dep_lower; k <= dep_upper; k++) {
if (relidx_list[j].dep_flags & (1 << (k - 1))) {
relation_lists_t *rlists;
relation_t *r;

rlists = dep_rel_list + k - dep_lower;
r = rlists->rlist + rlists->num_relations;

*r = tmp_relation;
r->rel_index = i;
r->factors = (uint8 *)xmalloc(factor_size *
sizeof(uint8));
memcpy(r->factors, tmp_relation.factors,
factor_size * sizeof(uint8));

rlists->num_relations++;
}
}
j++;
}

savefile_read_line(buf, sizeof(buf), savefile);
}

num_unique_relidx = j;
logprintf(obj, "Sqrt: read %u relations in total\n", j);

/* Reallocate memory for each dependency, as no dependency is likely to
have each relation in it */
for (i = dep_lower; i <= dep_upper; i++) {
relation_lists_t *rlists;

rlists = dep_rel_list + i - dep_lower;
rlists->rlist = (relation_t *)xrealloc(rlists->rlist,
(size_t) (rlists->num_relations * sizeof(relation_t)));
logprintf(obj, "Sqrt: Dependency %u has %u relations\n", i,
rlists->num_relations);
}

savefile_close(savefile);
hashtable_free(&unique_relidx);
*dep_rel_list_out = dep_rel_list;
mpz_clear(scratch);
}

/*--------------------------------------------------------------------*/
void nfs_read_cycles(msieve_obj *obj,
factor_base_t *fb,
Expand Down Expand Up @@ -708,6 +947,60 @@ void nfs_read_cycles(msieve_obj *obj,
}
}

/*------------------------------------------------------------------
Modification to msieve from FaaS
Method: nfs_read_cycles_threaded()
This is a modified version of nfs_read_cycles() that enables the
threading of the square root stage.
Using the modified read_cycles_threaded() and
nfs_get_cycle_relations_threaded(), it allows us to create lists of
relations for each of the dependencies in one pass of the .cyc cycle
file and the .dat relation file.
By creating all of the dependency relations objects in one go,
this allows us to save on file IO time and to create threads for
multiple dependencies at the same time.
-------------------------------------------------------------------*/

void nfs_read_cycles_threaded(msieve_obj *obj,
factor_base_t *fb,
relation_lists_t **rlists,
uint32 *dep_lower,
uint32 *dep_upper) {

int i;
la_dep_t *dep_cycle_list = NULL;
relation_lists_t *dep_rel_list = NULL;

/* read the raw list of relation numbers for each cycle and dependency */
read_cycles_threaded(obj, &dep_cycle_list, dep_lower, dep_upper);

if (dep_cycle_list == NULL) {
*rlists = NULL;
return;
}

/* now read the list of relations needed by the
list of cycles */
nfs_get_cycle_relations_threaded(obj, fb, dep_cycle_list,
&dep_rel_list, *dep_lower, *dep_upper);

/* need to free dep_cycle_list */

for (i = *dep_lower; i <= *dep_upper; i++) {
la_dep_t *dep = dep_cycle_list + i - *dep_lower;
free_cycle_list(dep->column, dep->num_cycles);
}
free(dep_cycle_list);

*rlists = dep_rel_list;
}

/*--------------------------------------------------------------------*/
void nfs_free_relation_list(relation_t *rlist, uint32 num_relations) {

Expand Down
738 changes: 738 additions & 0 deletions gnfs/sqrt/sqrt.c

Large diffs are not rendered by default.

176 changes: 176 additions & 0 deletions gnfs/sqrt/sqrt.h
Expand Up @@ -16,11 +16,187 @@ benefit from your work.
#define _GNFS_SQRT_SQRT_H_

#include "gnfs.h"
#include <thread.h>

#ifdef __cplusplus
extern "C" {
#endif

/*------------------------------------------------------------------
Modification to msieve from FaaS
Method: sqrt_fb_light_init()
This is performs a light initialization of the mpz_poly structs
in the factor_base_t.
Used only in sqrt_data_init()
-------------------------------------------------------------------*/

void sqrt_fb_light_init (factor_base_t *fb);

/*------------------------------------------------------------------
Modification to msieve from FaaS
Method: sqrt_fb_light_copy_a_r_poly()
This is performs a light copy of the mpz_poly structs
in the factor_base_t.
Used only in sqrt_data_deep_copy()
-------------------------------------------------------------------*/

void sqrt_fb_light_copy_a_r_poly (factor_base_t *dst, factor_base_t *src);

/*------------------------------------------------------------------
Modification to msieve from FaaS
Struct: sqrt_data
Contains all of the data necessary for the sqrt stage, to be passed
to each thread.
-------------------------------------------------------------------*/

typedef struct {
uint32 check_q;
factor_base_t fb;
mpz_poly_t monic_alg_poly;
mpz_poly_t *rpoly;
mpz_poly_t *apoly;
mpz_t exponent, sqrt_r, sqrt_a;
mpz_t c, tmp1, tmp2;
} sqrt_data;

/*------------------------------------------------------------------
Modification to msieve from FaaS
Method: sqrt_data_init()
This is performs a initialization of the sqrt_data struct.
Used in sqrt_data_deep_copy() and nfs_find_factors_threaded().
-------------------------------------------------------------------*/

void sqrt_data_init (sqrt_data *dat);

/*------------------------------------------------------------------
Modification to msieve from FaaS
Method: sqrt_data_deep_copy()
This is performs a deep copy of the sqrt_data struct. Only pass
in an uninitialized, but memory allocated, sqrt_data pointer as
dst. The current method does not do the correct checking to
ensure memory-safety otherwise.
Used in nfs_find_factors_threaded() in order to make a copy of
sqrt_data for each of the threads.
-------------------------------------------------------------------*/

void sqrt_data_deep_copy (sqrt_data *dst, sqrt_data *src);

/*------------------------------------------------------------------
Modification to msieve from FaaS
Method: free_sqrt_data()
Frees sqrt_data.
Used in nfs_find_factors_threaded() and sqrt_thread_shutdown()
-------------------------------------------------------------------*/

void free_sqrt_data (sqrt_data *dat);

/*------------------------------------------------------------------
Modification to msieve from FaaS
Struct: sqrt_thread_data
Contains all of the data necessary for each thread to compute its
dependency.
-------------------------------------------------------------------*/

typedef struct {
pthread_mutex_t *factor_found_mutex;
pthread_mutex_t *status_mutex;
pthread_cond_t *status_cond;
pthread_mutex_t *count_mutex;
int *status;
int *count;
uint32 num_relations;
uint32 num_free_relations;
relation_t *rlist;
abpair_t *abpairs;
sqrt_data *dat;
uint32 dep_no;
msieve_obj *obj;
mpz_t n;
factor_list_t *factor_list;

} sqrt_thread_data;

/*------------------------------------------------------------------
Modification to msieve from FaaS
Method: sqrt_thread_data_init()
Initializes the data object required by each thread.
Used in nfs_find_factors_threaded().
-------------------------------------------------------------------*/

void sqrt_thread_data_init(msieve_obj *obj,
sqrt_thread_data *thread_dat,
sqrt_data *dat, relation_lists_t *rl,
pthread_mutex_t *factor_found_mutex,
pthread_mutex_t *status_mutex,
pthread_cond_t *status_cond, int *status, pthread_mutex_t *count_mutex,
int *count, mpz_t n, factor_list_t *factor_list);

/*------------------------------------------------------------------
Modification to msieve from FaaS
Method: free_sqrt_thread_data()
Frees the thread data object upon completion of the thread.
Used in nfs_find_factors_threaded() and sqrt_thread_shutdown().
-------------------------------------------------------------------*/

void free_sqrt_thread_data(sqrt_thread_data *data);

/*------------------------------------------------------------------
Modification to msieve from FaaS
Method: sqrt_thread_shutdown()
Performs necessary clean up of thread data after the task is complete.
Used in nfs_find_factors_threaded().
-------------------------------------------------------------------*/

void sqrt_thread_shutdown(void *arg, int thread_num);

uint32 get_prime_for_sqrt(mpz_poly_t *alg_poly,
uint32 min_value,
uint32 *q_out);
Expand Down
46 changes: 46 additions & 0 deletions include/common.h
Expand Up @@ -284,6 +284,26 @@ typedef struct {
la_cycle_t cycle; /* list of relations comprising this column */
} la_col_t;

/*------------------------------------------------------------------
Modification to msieve from FaaS
Struct: la_dep_t
This struct allows for a list of dependencies to be created, each
containing a list of cycles.
Used to collect cycles in read_cycles_threaded() and then relations
in nfs_get_cycle_relations_threaded().
-------------------------------------------------------------------*/

typedef struct {
la_col_t *column;
uint32 num_cycles;
uint32 curr_cycle;
} la_dep_t;

/* merge src1[] and src2[] into merge_array[], assumed
large enough to hold the merged result. Return the
final number of elements in merge_array */
Expand Down Expand Up @@ -335,6 +355,32 @@ void read_cycles(msieve_obj *obj,
uint32 dependency,
uint32 *colperm);

/*------------------------------------------------------------------
Modification to msieve from FaaS
Method: read_cycles_threaded()
This is a modified version of read_cycles() that enables the
threading of the square root stage.
The key modification is that it creates lists of cycles (and relation
ids within them) for each of the dependencies in one pass of the .cyc
cycle file.
It also takes in uint32 pointers for dep_lower and dep_upper. This
allows for the modification of dep_lower and dep_upper, as some of
the dependencies will not contain any cycles. This differs from the
original code, which due to its sequential nature would normally "hit"
a "good" dependency before running out of dependencies.
-------------------------------------------------------------------*/

void read_cycles_threaded(msieve_obj *obj,
la_dep_t **dep_cycle_list_out,
uint32 *dep_lower,
uint32 *dep_upper);

/*-------------- MISCELLANEOUS STUFF ----------------------------------*/

#define POSITIVE 0
Expand Down