Skip to content

Commit

Permalink
More tweaks to reduce the odds of processes hanging. Also disabled ss…
Browse files Browse the repository at this point in the history
…e, as

it was causing issues with ssl on Alpine.
  • Loading branch information
Mike Miller committed Oct 27, 2022
1 parent 062067f commit 5ac825c
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 51 deletions.
2 changes: 1 addition & 1 deletion emu/cpuid.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ static inline void do_cpuid(dword_t *eax, dword_t *ebx, dword_t *ecx, dword_t *e
*edx = (1 << 0) // fpu
| (1 << 15) // cmov
| (1 << 23) // mmx
| (1 << 25) // sse
// | (1 << 25) // sse
| (1 << 26) // sse2
;
break;
Expand Down
47 changes: 2 additions & 45 deletions emu/vec.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
#ifdef __ARM_NEON__
#include <arm_neon.h>
#endif
#include <math.h>
#include <string.h>

Expand Down Expand Up @@ -193,15 +190,8 @@ void vec_imm_shiftrs_d128(NO_CPU, const uint8_t amount, union xmm_reg *dst) {
}

void vec_add_b128(NO_CPU, union xmm_reg *src, union xmm_reg *dst) {
#ifdef __ARM_NEON__
uint8x16_t neon_dst = vld1q_u8(dst->u8);
uint8x16_t neon_src = vld1q_u8(src->u8);
uint8x16_t neon_res = vaddq_u8(neon_dst, neon_src);
vst1q_u8(dst->u8, neon_res);
#else
for (unsigned i = 0; i < array_size(src->u8); i++)
dst->u8[i] += src->u8[i];
#endif
}
void vec_add_w128(NO_CPU, union xmm_reg *src, union xmm_reg *dst) {
for (unsigned i = 0; i < array_size(src->u16); i++)
Expand Down Expand Up @@ -332,16 +322,9 @@ void vec_xor64(NO_CPU, union mm_reg *src, union mm_reg *dst) {
}

void vec_min_ub128(NO_CPU, union xmm_reg *src, union xmm_reg *dst) {
#ifdef __ARM_NEON__
uint8x16_t neon_dst = vld1q_u8(dst->u8);
uint8x16_t neon_src = vld1q_u8(src->u8);
uint8x16_t neon_res = vminq_u8(neon_dst, neon_src);
vst1q_u8(dst->u8, neon_res);
#else
for (unsigned i = 0; i < array_size(src->u8); i++)
if (src->u8[i] < dst->u8[i])
dst->u8[i] = src->u8[i];
#endif
}
void vec_max_ub128(NO_CPU, union xmm_reg *src, union xmm_reg *dst) {
for (unsigned i = 0; i < array_size(src->u8); i++)
Expand All @@ -359,7 +342,7 @@ void vec_maxs_w128(NO_CPU, union xmm_reg *src, union xmm_reg *dst) {
}

static bool cmpd(double a, double b, int type) {
bool res = false;
bool res;
switch (type % 4) {
case 0: res = a == b; break;
case 1: res = a < b; break;
Expand All @@ -370,7 +353,7 @@ static bool cmpd(double a, double b, int type) {
return res;
}
static bool cmps(float a, float b, int type) {
bool res = false;
bool res;
switch (type % 4) {
case 0: res = a == b; break;
case 1: res = a < b; break;
Expand Down Expand Up @@ -602,15 +585,8 @@ void vec_shuffle_d128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst, uint
}

void vec_compare_eqb128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst) {
#ifdef __ARM_NEON__
uint8x16_t neon_dst = vld1q_u8(dst->u8);
uint8x16_t neon_src = vld1q_u8(src->u8);
uint8x16_t neon_res = vceqq_u8(neon_dst, neon_src);
vst1q_u8(dst->u8, neon_res);
#else
for (unsigned i = 0; i < array_size(src->u8); i++)
dst->u8[i] = dst->u8[i] == src->u8[i] ? ~0 : 0;
#endif
}
void vec_compare_eqw128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst) {
for (unsigned i = 0; i < array_size(src->u16); i++)
Expand All @@ -621,16 +597,6 @@ void vec_compare_eqd128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst) {
dst->u32[i] = dst->u32[i] == src->u32[i] ? ~0 : 0;
}

/*
* Neon algo: (only one part (64bits) is demonstrated, algo works the same for another part)
* z - is a bit which forms the mask, X - is not interesting bit.
* neon_src: zXXXXXXXzXXXXXXXzXXXXXXXzXXXXXXXzXXXXXXXzXXXXXXXzXXXXXXXzXXXXXXX...
* step1: 0000000z0000000z0000000z0000000z0000000z0000000z0000000z0000000z...
* step2: 00000000000000zz00000000000000zz00000000000000zz00000000000000zz...
* step3: 0000000000000000000000000000zzzz0000000000000000000000000000zzzz...
* step4: 00000000000000000000000000000000000000000000000000000000zzzzzzzz...
* After step4, 8 bits at the end of each 64bit lane are loaded into dst.
*/
void vec_compares_gtb128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst) {
for (unsigned i = 0; i < array_size(src->u8); i++)
dst->u8[i] = (int8_t)dst->u8[i] > (int8_t)src->u8[i] ? ~0 : 0;
Expand All @@ -646,19 +612,10 @@ void vec_compares_gtd128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst) {

void vec_movmask_b128(NO_CPU, const union xmm_reg *src, uint32_t *dst) {
*dst = 0;
#if defined(__ARM_NEON__) && defined(__LITTLE_ENDIAN__)
uint8x16_t neon_src = vld1q_u8(src->u8);
uint16x8_t step1 = vshrq_n_u8(neon_src, 7);
uint32x4_t step2 = vsraq_n_u16(step1, step1, 7);
uint64x2_t step3 = vsraq_n_u32(step2, step2, 14);
uint16x8_t step4 = vsraq_n_u64(step3, step3, 28);
*dst |= (vgetq_lane_u8(step4, 8) << 8) | (vgetq_lane_u8(step4, 0));
#else
for (unsigned i = 0; i < array_size(src->u8); i++) {
if (src->u8[i] & (1 << 7))
*dst |= 1 << i;
}
#endif
}

void vec_fmovmask_d128(NO_CPU, const union xmm_reg *src, uint32_t *dst) {
Expand Down
30 changes: 25 additions & 5 deletions util/sync.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,18 +55,36 @@ int wait_for_ignore_signals(cond_t *cond, lock_t *lock, struct timespec *timeout
struct lock_debug lock_tmp = lock->debug;
lock->debug = (struct lock_debug) { .initialized = lock->debug.initialized };
#endif
unsigned attempts = 0;
if (!timeout) {
//rc = pthread_cond_timedwait_relative_np(&cond->cond, &lock->m, timeout);
if(current->pid <= 20) {
pthread_cond_wait(&cond->cond, &lock->m);
goto SKIP;
}
AGAIN:
if(lock->pid == -1) { // Something has gone wrong. -mke
lock(&current->waiting_cond_lock, 0);
current->waiting_cond = NULL;
current->waiting_lock = NULL;
unlock(&current->waiting_cond_lock);
return _ETIMEDOUT;
//return _ETIMEDOUT;
printk("ERROR: Locking PID is gone in wait_for_ignore_signals() (%s:%d). Attempting recovery", current->comm, current->pid);
return 0;
// Weird
}

pthread_cond_wait(&cond->cond, &lock->m);// Sometimes things get stuck here for some reason. -mke
struct timespec trigger_time;
clock_gettime(CLOCK_MONOTONIC, &trigger_time);
trigger_time.tv_sec = trigger_time.tv_sec + 6;
trigger_time.tv_nsec = 0;
rc = pthread_cond_timedwait_relative_np(&cond->cond, &lock->m, &trigger_time);
if(rc == ETIMEDOUT) {
attempts++;
if(attempts <= 6) // We are likely deadlocked if more than ten attempts -mke
goto AGAIN;
printk("ERROR: Deadlock in wait_for_ignore_signals() (%s:%d). Attempting recovery", current->comm, current->pid);
//return _ETIMEDOUT;
return 0;
}
} else {
#if __linux__
struct timespec abs_timeout;
Expand All @@ -84,6 +102,8 @@ int wait_for_ignore_signals(cond_t *cond, lock_t *lock, struct timespec *timeout
#error Unimplemented pthread_cond_wait relative timeout.
#endif
}

SKIP:
#if LOCK_DEBUG
lock->debug = lock_tmp;
#endif
Expand All @@ -109,7 +129,7 @@ void notify_once(cond_t *cond) {
__thread sigjmp_buf unwind_buf;
__thread bool should_unwind = false;

void sigusr1_handler() {
void sigusr1_handler(void) {
if (should_unwind) {
should_unwind = false;
siglongjmp(unwind_buf, 1);
Expand Down

0 comments on commit 5ac825c

Please sign in to comment.