Permalink
Cannot retrieve contributors at this time
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
290 lines (241 sloc)
7.63 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Author......: See docs/credits.txt | |
* License.....: MIT | |
*/ | |
#define NEW_SIMD_CODE | |
#ifdef KERNEL_STATIC | |
#include M2S(INCLUDE_PATH/inc_vendor.h) | |
#include M2S(INCLUDE_PATH/inc_types.h) | |
#include M2S(INCLUDE_PATH/inc_platform.cl) | |
#include M2S(INCLUDE_PATH/inc_common.cl) | |
#include M2S(INCLUDE_PATH/inc_simd.cl) | |
#include M2S(INCLUDE_PATH/inc_hash_sha256.cl) | |
#endif | |
#define COMPARE_S M2S(INCLUDE_PATH/inc_comp_single.cl) | |
#define COMPARE_M M2S(INCLUDE_PATH/inc_comp_multi.cl) | |
typedef struct pbkdf2_sha256_tmp | |
{ | |
u32 ipad[8]; | |
u32 opad[8]; | |
u32 dgst[32]; | |
u32 out[32]; | |
} pbkdf2_sha256_tmp_t; | |
typedef struct pbkdf2_sha256 | |
{ | |
u32 salt_buf[64]; | |
} pbkdf2_sha256_t; | |
DECLSPEC void hmac_sha256_run_V (PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, PRIVATE_AS u32x *w2, PRIVATE_AS u32x *w3, PRIVATE_AS u32x *ipad, PRIVATE_AS u32x *opad, PRIVATE_AS u32x *digest) | |
{ | |
digest[0] = ipad[0]; | |
digest[1] = ipad[1]; | |
digest[2] = ipad[2]; | |
digest[3] = ipad[3]; | |
digest[4] = ipad[4]; | |
digest[5] = ipad[5]; | |
digest[6] = ipad[6]; | |
digest[7] = ipad[7]; | |
sha256_transform_vector (w0, w1, w2, w3, digest); | |
w0[0] = digest[0]; | |
w0[1] = digest[1]; | |
w0[2] = digest[2]; | |
w0[3] = digest[3]; | |
w1[0] = digest[4]; | |
w1[1] = digest[5]; | |
w1[2] = digest[6]; | |
w1[3] = digest[7]; | |
w2[0] = 0x80000000; | |
w2[1] = 0; | |
w2[2] = 0; | |
w2[3] = 0; | |
w3[0] = 0; | |
w3[1] = 0; | |
w3[2] = 0; | |
w3[3] = (64 + 32) * 8; | |
digest[0] = opad[0]; | |
digest[1] = opad[1]; | |
digest[2] = opad[2]; | |
digest[3] = opad[3]; | |
digest[4] = opad[4]; | |
digest[5] = opad[5]; | |
digest[6] = opad[6]; | |
digest[7] = opad[7]; | |
sha256_transform_vector (w0, w1, w2, w3, digest); | |
} | |
KERNEL_FQ void m10900_init (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sha256_t)) | |
{ | |
/** | |
* base | |
*/ | |
const u64 gid = get_global_id (0); | |
if (gid >= GID_CNT) return; | |
sha256_hmac_ctx_t sha256_hmac_ctx; | |
sha256_hmac_init_global_swap (&sha256_hmac_ctx, pws[gid].i, pws[gid].pw_len); | |
tmps[gid].ipad[0] = sha256_hmac_ctx.ipad.h[0]; | |
tmps[gid].ipad[1] = sha256_hmac_ctx.ipad.h[1]; | |
tmps[gid].ipad[2] = sha256_hmac_ctx.ipad.h[2]; | |
tmps[gid].ipad[3] = sha256_hmac_ctx.ipad.h[3]; | |
tmps[gid].ipad[4] = sha256_hmac_ctx.ipad.h[4]; | |
tmps[gid].ipad[5] = sha256_hmac_ctx.ipad.h[5]; | |
tmps[gid].ipad[6] = sha256_hmac_ctx.ipad.h[6]; | |
tmps[gid].ipad[7] = sha256_hmac_ctx.ipad.h[7]; | |
tmps[gid].opad[0] = sha256_hmac_ctx.opad.h[0]; | |
tmps[gid].opad[1] = sha256_hmac_ctx.opad.h[1]; | |
tmps[gid].opad[2] = sha256_hmac_ctx.opad.h[2]; | |
tmps[gid].opad[3] = sha256_hmac_ctx.opad.h[3]; | |
tmps[gid].opad[4] = sha256_hmac_ctx.opad.h[4]; | |
tmps[gid].opad[5] = sha256_hmac_ctx.opad.h[5]; | |
tmps[gid].opad[6] = sha256_hmac_ctx.opad.h[6]; | |
tmps[gid].opad[7] = sha256_hmac_ctx.opad.h[7]; | |
sha256_hmac_update_global_swap (&sha256_hmac_ctx, esalt_bufs[DIGESTS_OFFSET_HOST].salt_buf, salt_bufs[SALT_POS_HOST].salt_len); | |
for (u32 i = 0, j = 1; i < 8; i += 8, j += 1) | |
{ | |
sha256_hmac_ctx_t sha256_hmac_ctx2 = sha256_hmac_ctx; | |
u32 w0[4]; | |
u32 w1[4]; | |
u32 w2[4]; | |
u32 w3[4]; | |
w0[0] = j; | |
w0[1] = 0; | |
w0[2] = 0; | |
w0[3] = 0; | |
w1[0] = 0; | |
w1[1] = 0; | |
w1[2] = 0; | |
w1[3] = 0; | |
w2[0] = 0; | |
w2[1] = 0; | |
w2[2] = 0; | |
w2[3] = 0; | |
w3[0] = 0; | |
w3[1] = 0; | |
w3[2] = 0; | |
w3[3] = 0; | |
sha256_hmac_update_64 (&sha256_hmac_ctx2, w0, w1, w2, w3, 4); | |
sha256_hmac_final (&sha256_hmac_ctx2); | |
tmps[gid].dgst[i + 0] = sha256_hmac_ctx2.opad.h[0]; | |
tmps[gid].dgst[i + 1] = sha256_hmac_ctx2.opad.h[1]; | |
tmps[gid].dgst[i + 2] = sha256_hmac_ctx2.opad.h[2]; | |
tmps[gid].dgst[i + 3] = sha256_hmac_ctx2.opad.h[3]; | |
tmps[gid].dgst[i + 4] = sha256_hmac_ctx2.opad.h[4]; | |
tmps[gid].dgst[i + 5] = sha256_hmac_ctx2.opad.h[5]; | |
tmps[gid].dgst[i + 6] = sha256_hmac_ctx2.opad.h[6]; | |
tmps[gid].dgst[i + 7] = sha256_hmac_ctx2.opad.h[7]; | |
tmps[gid].out[i + 0] = tmps[gid].dgst[i + 0]; | |
tmps[gid].out[i + 1] = tmps[gid].dgst[i + 1]; | |
tmps[gid].out[i + 2] = tmps[gid].dgst[i + 2]; | |
tmps[gid].out[i + 3] = tmps[gid].dgst[i + 3]; | |
tmps[gid].out[i + 4] = tmps[gid].dgst[i + 4]; | |
tmps[gid].out[i + 5] = tmps[gid].dgst[i + 5]; | |
tmps[gid].out[i + 6] = tmps[gid].dgst[i + 6]; | |
tmps[gid].out[i + 7] = tmps[gid].dgst[i + 7]; | |
} | |
} | |
KERNEL_FQ void m10900_loop (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sha256_t)) | |
{ | |
const u64 gid = get_global_id (0); | |
if ((gid * VECT_SIZE) >= GID_CNT) return; | |
u32x ipad[8]; | |
u32x opad[8]; | |
ipad[0] = packv (tmps, ipad, gid, 0); | |
ipad[1] = packv (tmps, ipad, gid, 1); | |
ipad[2] = packv (tmps, ipad, gid, 2); | |
ipad[3] = packv (tmps, ipad, gid, 3); | |
ipad[4] = packv (tmps, ipad, gid, 4); | |
ipad[5] = packv (tmps, ipad, gid, 5); | |
ipad[6] = packv (tmps, ipad, gid, 6); | |
ipad[7] = packv (tmps, ipad, gid, 7); | |
opad[0] = packv (tmps, opad, gid, 0); | |
opad[1] = packv (tmps, opad, gid, 1); | |
opad[2] = packv (tmps, opad, gid, 2); | |
opad[3] = packv (tmps, opad, gid, 3); | |
opad[4] = packv (tmps, opad, gid, 4); | |
opad[5] = packv (tmps, opad, gid, 5); | |
opad[6] = packv (tmps, opad, gid, 6); | |
opad[7] = packv (tmps, opad, gid, 7); | |
for (u32 i = 0; i < 8; i += 8) | |
{ | |
u32x dgst[8]; | |
u32x out[8]; | |
dgst[0] = packv (tmps, dgst, gid, i + 0); | |
dgst[1] = packv (tmps, dgst, gid, i + 1); | |
dgst[2] = packv (tmps, dgst, gid, i + 2); | |
dgst[3] = packv (tmps, dgst, gid, i + 3); | |
dgst[4] = packv (tmps, dgst, gid, i + 4); | |
dgst[5] = packv (tmps, dgst, gid, i + 5); | |
dgst[6] = packv (tmps, dgst, gid, i + 6); | |
dgst[7] = packv (tmps, dgst, gid, i + 7); | |
out[0] = packv (tmps, out, gid, i + 0); | |
out[1] = packv (tmps, out, gid, i + 1); | |
out[2] = packv (tmps, out, gid, i + 2); | |
out[3] = packv (tmps, out, gid, i + 3); | |
out[4] = packv (tmps, out, gid, i + 4); | |
out[5] = packv (tmps, out, gid, i + 5); | |
out[6] = packv (tmps, out, gid, i + 6); | |
out[7] = packv (tmps, out, gid, i + 7); | |
for (u32 j = 0; j < LOOP_CNT; j++) | |
{ | |
u32x w0[4]; | |
u32x w1[4]; | |
u32x w2[4]; | |
u32x w3[4]; | |
w0[0] = dgst[0]; | |
w0[1] = dgst[1]; | |
w0[2] = dgst[2]; | |
w0[3] = dgst[3]; | |
w1[0] = dgst[4]; | |
w1[1] = dgst[5]; | |
w1[2] = dgst[6]; | |
w1[3] = dgst[7]; | |
w2[0] = 0x80000000; | |
w2[1] = 0; | |
w2[2] = 0; | |
w2[3] = 0; | |
w3[0] = 0; | |
w3[1] = 0; | |
w3[2] = 0; | |
w3[3] = (64 + 32) * 8; | |
hmac_sha256_run_V (w0, w1, w2, w3, ipad, opad, dgst); | |
out[0] ^= dgst[0]; | |
out[1] ^= dgst[1]; | |
out[2] ^= dgst[2]; | |
out[3] ^= dgst[3]; | |
out[4] ^= dgst[4]; | |
out[5] ^= dgst[5]; | |
out[6] ^= dgst[6]; | |
out[7] ^= dgst[7]; | |
} | |
unpackv (tmps, dgst, gid, i + 0, dgst[0]); | |
unpackv (tmps, dgst, gid, i + 1, dgst[1]); | |
unpackv (tmps, dgst, gid, i + 2, dgst[2]); | |
unpackv (tmps, dgst, gid, i + 3, dgst[3]); | |
unpackv (tmps, dgst, gid, i + 4, dgst[4]); | |
unpackv (tmps, dgst, gid, i + 5, dgst[5]); | |
unpackv (tmps, dgst, gid, i + 6, dgst[6]); | |
unpackv (tmps, dgst, gid, i + 7, dgst[7]); | |
unpackv (tmps, out, gid, i + 0, out[0]); | |
unpackv (tmps, out, gid, i + 1, out[1]); | |
unpackv (tmps, out, gid, i + 2, out[2]); | |
unpackv (tmps, out, gid, i + 3, out[3]); | |
unpackv (tmps, out, gid, i + 4, out[4]); | |
unpackv (tmps, out, gid, i + 5, out[5]); | |
unpackv (tmps, out, gid, i + 6, out[6]); | |
unpackv (tmps, out, gid, i + 7, out[7]); | |
} | |
} | |
KERNEL_FQ void m10900_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sha256_t)) | |
{ | |
/** | |
* base | |
*/ | |
const u64 gid = get_global_id (0); | |
if (gid >= GID_CNT) return; | |
const u64 lid = get_local_id (0); | |
const u32 r0 = tmps[gid].out[DGST_R0]; | |
const u32 r1 = tmps[gid].out[DGST_R1]; | |
const u32 r2 = tmps[gid].out[DGST_R2]; | |
const u32 r3 = tmps[gid].out[DGST_R3]; | |
#define il_pos 0 | |
#ifdef KERNEL_STATIC | |
#include COMPARE_M | |
#endif | |
} |