Skip to content

Commit

Permalink
Some cleanup/fixing in regards to lock->comm, plus cleanup and bug fi…
Browse files Browse the repository at this point in the history
…xing of a bogus unlock of &pid_lock that I introduced in August.
  • Loading branch information
Mike Miller committed Oct 27, 2022
1 parent 5ac825c commit f578a0a
Show file tree
Hide file tree
Showing 2 changed files with 85 additions and 67 deletions.
147 changes: 82 additions & 65 deletions util/sync.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,67 @@ static bool is_signal_pending(lock_t *lock) {
return pending;
}

void modify_critical_region_counter(struct task *task, int value, __attribute__((unused)) const char *file, __attribute__((unused)) int line) { // value Should only be -1 or 1. -mke
if(task == NULL) {
if(current != NULL) {
task = current;
} else {
return;
}
} else if(task->exiting) { // Don't mess with tasks that are exiting. -mke
return;
}

pthread_mutex_lock(&task->critical_region.lock);

if(!task->critical_region.count && (value < 0)) { // Prevent our unsigned value attempting to go negative. -mke
//int skipme = strcmp(task->comm, "init");
//if((task->pid > 2) && (skipme != 0)) // Why ask why? -mke
printk("ERROR: Attempt to decrement critical_region count when it is already zero, ignoring(%s:%d) (%s:%d)\n", task->comm, task->pid, file, line);
return;
}


if((strcmp(task->comm, "easter_egg") == 0) && ( !noprintk)) { // Extra logging for the some command
noprintk = 1; // Avoid recursive logging -mke
printk("INFO: MCRC(%d(%s):%s:%d:%d:%d)\n", task->pid, task->comm, file, line, value, task->critical_region.count + value);
noprintk = 0;
}

task->critical_region.count = task->critical_region.count + value;

pthread_mutex_unlock(&task->critical_region.lock);
}

void modify_critical_region_counter_wrapper(int value, __attribute__((unused)) const char *file, __attribute__((unused)) int line) { // sync.h can't know about the definition of task struct due to recursive include files. -mke
if(current != NULL)
modify_critical_region_counter(current, value, file, line);
return;
}

void modify_locks_held_count(struct task *task, int value) { // value Should only be -1 or 1. -mke
if((task == NULL) && (current != NULL)) {
task = current;
} else {
return;
}

pthread_mutex_lock(&task->locks_held.lock);
if(!task->locks_held.count && (value < 0)) { // Prevent our unsigned value attempting to go negative. -mke
if((task->pid > 2) && (!strcmp(task->comm, "init"))) // Why ask why? -mke
printk("ERROR: Attempt to decrement locks_held count when it is already zero, ignoring\n");
return;
}
task->locks_held.count = task->locks_held.count + value;
pthread_mutex_unlock(&task->locks_held.lock);
}

void modify_locks_held_count_wrapper(int value) { // sync.h can't know about the definition of struct due to recursive include files. -mke
if(current != NULL)
modify_locks_held_count(current, value);
return;
}

int wait_for(cond_t *cond, lock_t *lock, struct timespec *timeout) {
if (is_signal_pending(lock))
return _EINTR;
Expand All @@ -43,12 +104,16 @@ int wait_for(cond_t *cond, lock_t *lock, struct timespec *timeout) {
}

int wait_for_ignore_signals(cond_t *cond, lock_t *lock, struct timespec *timeout) {

if (current) {
lock(&current->waiting_cond_lock, 0);
current->waiting_cond = cond;
current->waiting_lock = lock;
unlock(&current->waiting_cond_lock);
}

modify_critical_region_counter(current, 1,__FILE__, __LINE__);
int savepid = current->pid;

int rc = 0;
#if LOCK_DEBUG
Expand All @@ -62,28 +127,36 @@ int wait_for_ignore_signals(cond_t *cond, lock_t *lock, struct timespec *timeout
goto SKIP;
}
AGAIN:
if(lock->pid == -1) { // Something has gone wrong. -mke
if((lock->pid == -1) && (lock->comm[0] == 0)) { // Something has gone wrong. -mke
lock(&current->waiting_cond_lock, 0);
current->waiting_cond = NULL;
current->waiting_lock = NULL;
unlock(&current->waiting_cond_lock);
//return _ETIMEDOUT;
printk("ERROR: Locking PID is gone in wait_for_ignore_signals() (%s:%d). Attempting recovery", current->comm, current->pid);
printk("ERROR: Locking PID is gone in wait_for_ignore_signals() (%s:%d). Attempting recovery\n", current->comm, current->pid);
unlock(lock);
modify_critical_region_counter(current, -1,__FILE__, __LINE__);
return 0;
// Weird
}
struct timespec trigger_time;
clock_gettime(CLOCK_MONOTONIC, &trigger_time);
trigger_time.tv_sec = trigger_time.tv_sec + 6;
trigger_time.tv_nsec = 0;
if((lock->pid == -1) && (lock->comm[0] == 0)) { // Something has gone wrong. -mke
goto AGAIN; // Ugh. -mke
}
rc = pthread_cond_timedwait_relative_np(&cond->cond, &lock->m, &trigger_time);
if(rc == ETIMEDOUT) {
attempts++;
if(attempts <= 6) // We are likely deadlocked if more than ten attempts -mke
goto AGAIN;
printk("ERROR: Deadlock in wait_for_ignore_signals() (%s:%d). Attempting recovery", current->comm, current->pid);
//return _ETIMEDOUT;
printk("ERROR: Deadlock in wait_for_ignore_signals() (%s:%d). Attempting recovery\n", current->comm, current->pid);
unlock(lock);
modify_critical_region_counter(current, -1,__FILE__, __LINE__);
return 0;
} else if(rc == 0) {
goto SKIP;
}
} else {
#if __linux__
Expand Down Expand Up @@ -114,8 +187,12 @@ int wait_for_ignore_signals(cond_t *cond, lock_t *lock, struct timespec *timeout
current->waiting_lock = NULL;
unlock(&current->waiting_cond_lock);
}
if (rc == ETIMEDOUT)
if (rc == ETIMEDOUT) {
modify_critical_region_counter(current, -1,__FILE__, __LINE__);
return _ETIMEDOUT;
}

modify_critical_region_counter(current, -1,__FILE__, __LINE__);
return 0;
}

Expand Down Expand Up @@ -170,66 +247,6 @@ unsigned locks_held_count_wrapper() { // sync.h can't know about the definition
return 0;
}

void modify_critical_region_counter(struct task *task, int value, __attribute__((unused)) const char *file, __attribute__((unused)) int line) { // value Should only be -1 or 1. -mke
if(task == NULL) {
if(current != NULL) {
task = current;
} else {
return;
}
} else if(task->exiting) { // Don't mess with tasks that are exiting. -mke
return;
}

pthread_mutex_lock(&task->critical_region.lock);

if(!task->critical_region.count && (value < 0)) { // Prevent our unsigned value attempting to go negative. -mke
//int skipme = strcmp(task->comm, "init");
//if((task->pid > 2) && (skipme != 0)) // Why ask why? -mke
printk("ERROR: Attempt to decrement critical_region count when it is already zero, ignoring(%s:%d) (%s:%d)\n", task->comm, task->pid, file, line);
return;
}


if((strcmp(task->comm, "easter_egg") == 0) && ( !noprintk)) { // Extra logging for the some command
noprintk = 1; // Avoid recursive logging -mke
printk("INFO: MCRC(%d(%s):%s:%d:%d:%d)\n", task->pid, task->comm, file, line, value, task->critical_region.count + value);
noprintk = 0;
}

task->critical_region.count = task->critical_region.count + value;

pthread_mutex_unlock(&task->critical_region.lock);
}

void modify_critical_region_counter_wrapper(int value, __attribute__((unused)) const char *file, __attribute__((unused)) int line) { // sync.h can't know about the definition of task struct due to recursive include files. -mke
if(current != NULL)
modify_critical_region_counter(current, value, file, line);
return;
}

void modify_locks_held_count(struct task *task, int value) { // value Should only be -1 or 1. -mke
if((task == NULL) && (current != NULL)) {
task = current;
} else {
return;
}

pthread_mutex_lock(&task->locks_held.lock);
if(!task->locks_held.count && (value < 0)) { // Prevent our unsigned value attempting to go negative. -mke
if((task->pid > 2) && (!strcmp(task->comm, "init"))) // Why ask why? -mke
printk("ERROR: Attempt to decrement locks_held count when it is already zero, ignoring\n");
return;
}
task->locks_held.count = task->locks_held.count + value;
pthread_mutex_unlock(&task->locks_held.lock);
}

void modify_locks_held_count_wrapper(int value) { // sync.h can't know about the definition of struct due to recursive include files. -mke
if(current != NULL)
modify_locks_held_count(current, value);
return;
}

// This is how you would mitigate the unlock/wait race if the wait
// is async signal safe. wait_for *should* be safe from this race
Expand Down
5 changes: 3 additions & 2 deletions util/sync.h
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@ static inline void unlock(lock_t *lock) {
lock->owner = zero_init(pthread_t);
pthread_mutex_unlock(&lock->m);
lock->pid = -1; //
lock->comm[0] = 0;
modify_locks_held_count_wrapper(-1);
//modify_critical_region_counter_wrapper(-1, __FILE__, __LINE__);

Expand Down Expand Up @@ -322,7 +323,7 @@ static inline void _read_unlock(wrlock_t *lock, __attribute__((unused)) const ch
printk("ERROR: read_unlock(%x) error(PID: %d Process: %s count %d) (%s:%d)\n",lock, current_pid(), current_comm(), lock->val, file, line);
lock->val = 0;
lock->pid = -1;
strncpy(lock->comm, "", 1);
lock->comm[0] = 0;
modify_locks_held_count_wrapper(-1);
//modify_critical_region_counter_wrapper(-1, __FILE__, __LINE__);
//STRACE("read_unlock(%x, %s(%d), %s, %d\n", lock, lock->comm, lock->pid, file, line);
Expand Down Expand Up @@ -362,7 +363,7 @@ static inline void _write_unlock(wrlock_t *lock, __attribute__((unused)) const c
//assert(lock->val == -1);
lock->val = lock->line = lock->pid = 0;
lock->pid = -1;
strncpy(lock->comm, "", 1);
lock->comm[0] = 0;
//STRACE("write_unlock(%x, %s(%d), %s, %d\n", lock, lock->comm, lock->pid, file, line);
lock->file = NULL;
modify_locks_held_count_wrapper(-1);
Expand Down

0 comments on commit f578a0a

Please sign in to comment.