Skip to content

Commit

Permalink
Merge 1ac36af into 7dc328b
Browse files Browse the repository at this point in the history
  • Loading branch information
bashtage committed Jul 5, 2019
2 parents 7dc328b + 1ac36af commit 0623a0b
Show file tree
Hide file tree
Showing 22 changed files with 382 additions and 317 deletions.
4 changes: 4 additions & 0 deletions randomgen/chacha.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ from randomgen.common cimport *

cdef extern from "src/chacha/chacha.h":

int RANDOMGEN_USE_SIMD

struct CHACHA_STATE_T:
uint32_t block[16]
uint32_t keysetup[8]
Expand All @@ -16,6 +18,8 @@ cdef extern from "src/chacha/chacha.h":

void chacha_seed(chacha_state_t *state, uint64_t *seedval, uint64_t *stream, uint64_t *ctr)
void chacha_advance(chacha_state_t *state, uint64_t *delta)
int chacha_simd_capable()
void chacha_use_simd(int value)

cdef class ChaCha(BitGenerator):
cdef chacha_state_t *rng_state
Expand Down
31 changes: 31 additions & 0 deletions randomgen/chacha.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,37 @@ cdef class ChaCha(BitGenerator):
state = self.seed_seq.generate_state(4, np.uint64)
self.seed(key=state, counter=counter)

@property
def use_simd(self):
"""
Toggle use of SIMD
Parameters
----------
flag : bool
Flag indicating whether to use SIMD
Returns
-------
flag : bool
Current flag value
Raises
------
ValueError
If SIMD is not supported
"""
return RANDOMGEN_USE_SIMD

@use_simd.setter
def use_simd(self, value):
capable = chacha_simd_capable()
if value and not capable:
raise ValueError('CPU does not support SIMD implementation')
chacha_use_simd(bool(value))



def seed(self, seed=None, counter=None, key=None):
"""
seed(seed=None, counter=None, key=None)
Expand Down
2 changes: 1 addition & 1 deletion randomgen/generator.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -3970,7 +3970,7 @@ cdef class Generator:
mnix = <int64_t*>np.PyArray_DATA(mnarr)
sz = np.PyArray_SIZE(mnarr)
ni = n
check_constraint(ni, 'n', CONS_NON_NEGATIVE)
check_constraint(<double>ni, 'n', CONS_NON_NEGATIVE)
offset = 0
with self.lock, nogil:
for i in range(sz // d):
Expand Down
2 changes: 1 addition & 1 deletion randomgen/mtrand.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -3850,7 +3850,7 @@ cdef class RandomState:
mnix = <long*>np.PyArray_DATA(mnarr)
sz = np.PyArray_SIZE(mnarr)
ni = <long>n
check_constraint(ni, 'n', CONS_NON_NEGATIVE)
check_constraint(<double>ni, 'n', CONS_NON_NEGATIVE)
offset = 0
with self.lock, nogil:
for i in range(sz // d):
Expand Down
3 changes: 2 additions & 1 deletion randomgen/src/aesctr/aesctr.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,11 @@ int aes_capable(void)
{
#if defined(__AES__) && __AES__
int flags[32];
feature_flags(flags);
feature_flags(flags, RANDOMGEN_ECX);
RANDOMGEN_USE_AESNI = flags[AES_FEATURE_FLAG];
return RANDOMGEN_USE_AESNI;
#else
RANDOMGEN_USE_AESNI = 0;
return 0;
#endif
}
Expand Down
2 changes: 1 addition & 1 deletion randomgen/src/aesctr/softaes.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#ifndef _RANDOMGEN_TINY_AES_H_
#define _RANDOMGEN_TINY_AES_H_

#include <inttypes.h>
#include "../common/randomgen_config.h"

#if !defined(INLINE)
#define INLINE inline
Expand Down
86 changes: 56 additions & 30 deletions randomgen/src/chacha/chacha.c
Original file line number Diff line number Diff line change
@@ -1,42 +1,68 @@
#include "chacha.h"
#include <stdio.h>
#include "../common/cpu_features.h"

int RANDOMGEN_USE_SIMD;

extern INLINE uint32_t chacha_next32(chacha_state_t *state);

extern INLINE uint64_t chacha_next64(chacha_state_t *state);

extern INLINE double chacha_next_double(chacha_state_t *state);

void chacha_seed(chacha_state_t *state, uint64_t *seedval, uint64_t *stream, uint64_t *ctr) {
// Using a 128-bit seed.
state->keysetup[0] = seedval[0] & 0xffffffffu;
state->keysetup[1] = seedval[0] >> 32;
state->keysetup[2] = seedval[1] & 0xffffffffu;
state->keysetup[3] = seedval[1] >> 32;
// Using a 128-bit stream.
state->keysetup[4] = stream[0] & 0xffffffffu;
state->keysetup[5] = stream[0] >> 32;
state->keysetup[6] = stream[1] & 0xffffffffu;
state->keysetup[7] = stream[1] >> 32;

/* Ensure str[0] is at a node where a block would be generated */
state->ctr[0] = ((ctr[0] >> 4) << 4);
state->ctr[1] = ctr[1];
generate_block(state);
/* Store correct value of counter */
state->ctr[0] = ctr[0];
#if defined(__SSE2__) && __SSE2__
#if defined(__SSSE3__) && __SSSE3__
#define CHACHA_FEATURE_REG RANDOMGEN_ECX
#define CHACHA_FEATURE_FLAG 9
#else
#define CHACHA_FEATURE_REG RANDOMGEN_EDX
#define CHACHA_FEATURE_FLAG 26
#endif
#else
#define CHACHA_FEATURE_FLAG 0
#endif

int chacha_simd_capable(void) {
#if defined(__SSE2__) && __SSE2__
int flags[32];
feature_flags(flags, CHACHA_FEATURE_REG);
RANDOMGEN_USE_SIMD = flags[CHACHA_FEATURE_FLAG];
return RANDOMGEN_USE_SIMD;
#else
RANDOMGEN_USE_SIMD = 0;
return 0;
#endif
}

void chacha_use_simd(int flag) { RANDOMGEN_USE_SIMD = flag; }

void chacha_seed(chacha_state_t *state, uint64_t *seedval, uint64_t *stream,
uint64_t *ctr) {
chacha_simd_capable();
// Using a 128-bit seed.
state->keysetup[0] = seedval[0] & 0xffffffffu;
state->keysetup[1] = seedval[0] >> 32;
state->keysetup[2] = seedval[1] & 0xffffffffu;
state->keysetup[3] = seedval[1] >> 32;
// Using a 128-bit stream.
state->keysetup[4] = stream[0] & 0xffffffffu;
state->keysetup[5] = stream[0] >> 32;
state->keysetup[6] = stream[1] & 0xffffffffu;
state->keysetup[7] = stream[1] >> 32;

/* Ensure str[0] is at a node where a block would be generated */
state->ctr[0] = ((ctr[0] >> 4) << 4);
state->ctr[1] = ctr[1];
generate_block(state);
/* Store correct value of counter */
state->ctr[0] = ctr[0];
}

void chacha_advance(chacha_state_t *state, uint64_t *delta) {
int carry, idx = state->ctr[0] % 16;
uint64_t orig;
orig = state->ctr[0];
state->ctr[0] += delta[0];
carry = state->ctr[0] < orig;
state->ctr[1] += (delta[1] + carry);
if ((idx + delta[0] >= 16 || delta[1]) && ((state->ctr[0] % 16) != 0)) {
generate_block(state);
}
int carry, idx = state->ctr[0] % 16;
uint64_t orig;
orig = state->ctr[0];
state->ctr[0] += delta[0];
carry = state->ctr[0] < orig;
state->ctr[1] += (delta[1] + carry);
if ((idx + delta[0] >= 16 || delta[1]) && ((state->ctr[0] % 16) != 0)) {
generate_block(state);
}
}
28 changes: 19 additions & 9 deletions randomgen/src/chacha/chacha.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@
#define M128I_CAST (__m128i)
#endif

extern int RANDOMGEN_USE_SIMD;


typedef double * aligned_double_ptr ;

ALIGN_WINDOWS struct CHACHA_STATE_T {
Expand All @@ -42,11 +45,10 @@ ALIGN_WINDOWS struct CHACHA_STATE_T {
typedef struct CHACHA_STATE_T chacha_state_t;


#ifdef __SSE2__

#if defined(__SSE2__) && __SSE2__
// Get an efficient _mm_roti_epi32 based on enabled features.
#if !defined(__XOP__)
#if defined(__SSSE3__)
#if defined(__SSSE3__) && __SSSE3__
#define _mm_roti_epi32(r, c) ( \
((c) == 8) ? \
_mm_shuffle_epi8((r), _mm_set_epi8(14, 13, 12, 15, \
Expand Down Expand Up @@ -75,8 +77,7 @@ typedef struct CHACHA_STATE_T chacha_state_t;
#include <xopintrin.h>
#endif


static INLINE void chacha_core(chacha_state_t *state) {
static INLINE void chacha_core_ssse3(chacha_state_t *state) {
// ROTVn rotates the elements in the given vector n places to the left.
int i;

Expand Down Expand Up @@ -134,7 +135,7 @@ static INLINE void chacha_core(chacha_state_t *state) {
#undef CHACHA_ROTV2
#undef CHACHA_ROTV1
}
#else
#endif

static INLINE void chacha_core(chacha_state_t *state) {
int i;
Expand All @@ -160,7 +161,6 @@ static INLINE void chacha_core(chacha_state_t *state) {
#undef CHACHA_QUARTERROUND
#undef CHACHA_ROTL32
}
#endif

static INLINE void generate_block(chacha_state_t *state) {
int i;
Expand All @@ -177,13 +177,21 @@ static INLINE void generate_block(chacha_state_t *state) {
input[15] = (state->ctr[1] / 16) >> 32;

for (i = 0; i < 16; ++i) state->block[i] = input[i];
chacha_core(state);
#if defined(__SSE2__) && __SSE2__
if LIKELY(RANDOMGEN_USE_SIMD > 0) {
chacha_core_ssse3(state);
} else {
#endif
chacha_core(state);
#if defined(__SSE2__) && __SSE2__
}
#endif
for (i = 0; i < 16; ++i) state->block[i] += input[i];
}

static INLINE uint32_t chacha_next32(chacha_state_t *state){
int idx = state->ctr[0] % 16;
if (idx == 0) generate_block(state);
if UNLIKELY(idx == 0) generate_block(state);
++state->ctr[0];
if (state->ctr[0] == 0) ++state->ctr[1];

Expand All @@ -199,6 +207,8 @@ static INLINE double chacha_next_double(chacha_state_t *state){
return (chacha_next64(state) >> 11) * (1.0/9007199254740992.0);
}

void chacha_use_simd(int flag);
int chacha_simd_capable(void);
void chacha_seed(chacha_state_t *state, uint64_t *seedval, uint64_t *stream, uint64_t *ctr);
void chacha_advance(chacha_state_t *state, uint64_t *delta);

Expand Down
36 changes: 28 additions & 8 deletions randomgen/src/common/cpu_features.c
Original file line number Diff line number Diff line change
@@ -1,34 +1,54 @@
#include "cpu_features.h"

void feature_flags(int flags[])
void feature_flags(int flags[32], int major)
{
int i;
#if defined(HAVE_CPUID)
#if defined(HAVE_CPUID) && HAVE_CPUID
#if defined(__clang__) || defined(__GNUC__)
uint32_t num_ids = 0, eax = 0, ebx = 0, ecx = 0, edx = 0;
uint32_t num_ids = 0, reg = 0, eax = 0, ebx = 0, ecx = 0, edx = 0;
num_ids = __get_cpuid_max(0, &ebx);
ebx = 0;
if (num_ids >= 1)
{
__get_cpuid(1, &eax, &ebx, &ecx, &edx);
}
#elif defined(_MSC_VER) && defined(_WIN32)
int cpu_info[4];
int num_ids, ecx = 0;
int cpu_info[4] = {0};
int num_ids, reg = 0, eax = 0, ebx = 0, ecx = 0, edx = 0;
__cpuid(cpu_info, 0);
num_ids = (int)cpu_info[0];
if (num_ids >= 1)
{
__cpuidex(cpu_info, 1, 0);
eax = cpu_info[0];
ebx = cpu_info[1];
ecx = cpu_info[2];
edx = cpu_info[3];
}
#endif
#else
uint32_t ecx;
ecx = 0;
uint32_t reg, eax, ebx, ecx, edx;
reg = 0; eax = 0; ebx = 0; ecx = 0; edx = 0;
#endif
switch(major){
case 0:
reg = eax;
break;

case 1:
reg = ebx;
break;

case 2:
reg = ecx;
break;

case 3:
reg = edx;
break;
}
for (i = 0; i < 32; i++)
{
flags[i] = (ecx >> i) & 0x1;
flags[i] = (reg >> i) & 0x1;
}
}
9 changes: 7 additions & 2 deletions randomgen/src/common/cpu_features.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,16 @@

#include "randomgen_config.h"

#define RANDOMGEN_EAX 0
#define RANDOMGEN_EBX 1
#define RANDOMGEN_ECX 2
#define RANDOMGEN_EDX 3

#undef HAVE_CPUID
#if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \
defined(_M_IX86)
#if defined(_MSC_VER) && defined(_WIN32)
#if _MSC_VER >= 1900
#if _MSC_VER >= 1500
#define HAVE_CPUID 1
#endif
#else
Expand All @@ -16,6 +21,6 @@
#endif
#endif

void feature_flags(int flags[]);
void feature_flags(int flags[32], int major);

#endif /* _RANDOMGEN_CPU_FEATURES_H */
2 changes: 2 additions & 0 deletions randomgen/src/common/randomgen_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,10 @@

#ifdef _WIN32
#define UNLIKELY(x) ((x))
#define LIKELY(x) ((x))
#else
#define UNLIKELY(x) (__builtin_expect((x), 0))
#define LIKELY(x) (__builtin_expect((x), 1))
#endif

#endif

0 comments on commit 0623a0b

Please sign in to comment.