Skip to content
Browse files

new tag with bluegene plugin exactly like it is in the 1.3 trunk as o…

…f 11:46 on 9-23-08
  • Loading branch information...
1 parent 0b6aae3 commit 33049cabc1e827e1d4b6a2e34d2bb50db3147efb @dannyauble dannyauble committed
View
2 META
@@ -11,7 +11,7 @@
Minor: 3
Micro: 8
Version: 1.3.8
- Release: 1
+ Release: 3
API_CURRENT: 13
API_AGE: 0
API_REVISION: 0
View
10 src/common/slurm_accounting_storage.h
@@ -212,7 +212,8 @@ typedef struct {
} acct_qos_cond_t;
typedef struct {
- acct_admin_level_t admin_level;
+ uint16_t admin_level; /* really acct_admin_level_t but for
+ packing purposes needs to be uint16_t */
acct_association_cond_t *assoc_cond; /* use user_list here for
names */
List def_acct_list; /* list of char * */
@@ -228,7 +229,8 @@ typedef struct {
* src/slurmdbd/proc_req.c.
*/
typedef struct {
- acct_admin_level_t admin_level;
+ uint16_t admin_level; /* really acct_admin_level_t but for
+ packing purposes needs to be uint16_t */
List assoc_list; /* list of acct_association_rec_t *'s */
List coord_accts; /* list of acct_coord_rec_t *'s */
char *default_acct;
@@ -256,7 +258,9 @@ typedef struct {
typedef struct {
List objects; /* depending on type */
- acct_update_type_t type;
+ uint16_t type; /* really acct_update_type_t but for
+ * packing purposes needs to be a
+ * uint16_t */
} acct_update_object_t;
typedef struct {
View
2 src/common/slurm_protocol_pack.c
@@ -1092,7 +1092,7 @@ unpack_msg(slurm_msg_t * msg, Buf buffer)
(set_debug_level_msg_t **)&(msg->data), buffer);
break;
case ACCOUNTING_UPDATE_MSG:
- _unpack_accounting_update_msg(
+ rc = _unpack_accounting_update_msg(
(accounting_update_msg_t **)&msg->data,
buffer);
break;
View
1,134 src/plugins/select/bluegene/block_allocator/block_allocator.c
389 additions, 745 deletions not shown because the diff is too large. Please use a local Git client to view these changes.
View
35 src/plugins/select/bluegene/plugin/bg_job_place.c
@@ -380,7 +380,7 @@ static bg_record_t *_find_matching_block(List block_list,
test_only);
itr = list_iterator_create(block_list);
- while ((bg_record = (bg_record_t*) list_next(itr))) {
+ while ((bg_record = list_next(itr))) {
/* If test_only we want to fall through to tell the
scheduler that it is runnable just not right now.
*/
@@ -636,6 +636,8 @@ static int _check_for_booted_overlapping_blocks(
* bg_record
*/
list_remove(bg_record_itr);
+ slurm_mutex_lock(&block_state_mutex);
+
if(bg_record->original) {
debug3("This was a copy");
found_record =
@@ -651,8 +653,10 @@ static int _check_for_booted_overlapping_blocks(
}
destroy_bg_record(bg_record);
if(!found_record) {
- error("1 this record wasn't "
- "found in the list!");
+ debug2("This record wasn't "
+ "found in the bg_list, "
+ "no big deal, it "
+ "probably wasn't added");
//rc = SLURM_ERROR;
} else {
List temp_list =
@@ -663,6 +667,7 @@ static int _check_for_booted_overlapping_blocks(
free_block_list(temp_list);
list_destroy(temp_list);
}
+ slurm_mutex_unlock(&block_state_mutex);
}
rc = 1;
@@ -1228,7 +1233,7 @@ extern int submit_job(struct job_record *job_ptr, bitstr_t *slurm_block_bitmap,
uint16_t tmp16 = (uint16_t)NO_VAL;
List block_list = NULL;
int blocks_added = 0;
- int starttime = time(NULL);
+ time_t starttime = time(NULL);
bool test_only;
if (mode == SELECT_MODE_TEST_ONLY || mode == SELECT_MODE_WILL_RUN)
@@ -1380,7 +1385,7 @@ extern int test_job_list(List req_list)
// uint16_t tmp16 = (uint16_t)NO_VAL;
List block_list = NULL;
int blocks_added = 0;
- int starttime = time(NULL);
+ time_t starttime = time(NULL);
ListIterator itr = NULL;
select_will_run_t *will_run = NULL;
@@ -1420,10 +1425,22 @@ extern int test_job_list(List req_list)
if(rc == SLURM_SUCCESS) {
if(bg_record) {
- if(bg_record->job_ptr
- && bg_record->job_ptr->end_time) {
- starttime =
- bg_record->job_ptr->end_time;
+ /* Here we see if there is a job running since
+ * some jobs take awhile to finish we need to
+ * make sure the time of the end is in the
+ * future. If it isn't (meaning it is in the
+ * past or current time) we add 5 seconds to
+ * it so we don't use the block immediately.
+ */
+ if(bg_record->job_ptr
+ && bg_record->job_ptr->end_time) {
+ if(bg_record->job_ptr->end_time <=
+ starttime)
+ starttime += 5;
+ else {
+ starttime = bg_record->
+ job_ptr->end_time;
+ }
}
bg_record->job_running =
will_run->job_ptr->job_id;
View
18 src/plugins/select/bluegene/plugin/bg_job_run.c
@@ -546,7 +546,7 @@ static void _start_agent(bg_update_t *bg_update_ptr)
slurm_strerror(rc));
job_fail(bg_update_ptr->job_ptr->job_id);
}
- lock_slurmctld(job_write_lock);
+ unlock_slurmctld(job_write_lock);
slurm_mutex_unlock(&job_start_mutex);
return;
@@ -1183,7 +1183,21 @@ extern int boot_block(bg_record_t *bg_record)
!= STATUS_OK) {
error("bridge_create_block(%s): %s",
bg_record->bg_block_id, bg_err_str(rc));
-
+ if(rc == INCOMPATIBLE_STATE) {
+ char reason[128], time_str[32];
+ time_t now = time(NULL);
+ slurm_make_time_str(&now, time_str, sizeof(time_str));
+ snprintf(reason, sizeof(reason),
+ "boot_block: "
+ "Block %s is in an incompatable state. "
+ "This usually means hardware is allocated "
+ "by another block (maybe outside of SLURM). "
+ "[SLURM@%s]",
+ bg_record->bg_block_id, time_str);
+ drain_as_needed(bg_record, reason);
+ bg_record->boot_state = 0;
+ bg_record->boot_count = 0;
+ }
return SLURM_ERROR;
}
View
1 src/plugins/select/bluegene/plugin/bluegene.c
@@ -488,7 +488,6 @@ extern bg_record_t *find_and_remove_org_from_bg_list(List my_list,
if(bit_equal(bg_record->bitmap, found_record->bitmap)
&& bit_equal(bg_record->ionode_bitmap,
found_record->ionode_bitmap)) {
-
if(!strcmp(bg_record->bg_block_id,
found_record->bg_block_id)) {
list_remove(itr);
View
2 src/plugins/select/bluegene/plugin/dynamic_block.c
@@ -232,7 +232,7 @@ extern List create_dynamic_block(List block_list,
else
results = list_create(NULL);
if (!allocate_block(request, results)) {
- debug("allocate failure for size %d base partitions",
+ debug2("allocate failure for size %d base partitions",
request->size);
rc = SLURM_ERROR;
}
View
7 src/slurmctld/partition_mgr.c
@@ -1093,11 +1093,12 @@ uid_t *_get_group_members(char *group_name)
}
}
- setpwent();
#ifdef HAVE_AIX
+ setpwent_r(&fp);
while (!getpwent_r(&pw, pw_buffer, PW_BUF_SIZE, &fp)) {
pwd_result = &pw;
#else
+ setpwent();
while (!getpwent_r(&pw, pw_buffer, PW_BUF_SIZE, &pwd_result)) {
#endif
if (pwd_result->pw_gid != my_gid)
@@ -1106,7 +1107,11 @@ uid_t *_get_group_members(char *group_name)
xrealloc(group_uids, ((j+1) * sizeof(uid_t)));
group_uids[j-1] = pwd_result->pw_uid;
}
+#ifdef HAVE_AIX
+ endpwent_r(&fp);
+#else
endpwent();
+#endif
return group_uids;
}
View
2 src/slurmd/slurmd/req.c
@@ -3193,7 +3193,7 @@ init_gids_cache(int cache)
getgroups(ngids, orig_gids);
#ifdef HAVE_AIX
- setpwent(&fp);
+ setpwent_r(&fp);
while (!getpwent_r(&pw, buf, BUF_SIZE, &fp)) {
pwd = &pw;
#else

0 comments on commit 33049ca

Please sign in to comment.
Something went wrong with that request. Please try again.