diff --git a/configs/kernel-x86_64-debug-rhel.config b/configs/kernel-x86_64-debug-rhel.config index 936a8a88f0dea..a9072995d48e8 100644 --- a/configs/kernel-x86_64-debug-rhel.config +++ b/configs/kernel-x86_64-debug-rhel.config @@ -7194,3 +7194,14 @@ CONFIG_ZSWAP=y # CONFIG_ZSWAP_ZPOOL_DEFAULT_Z3FOLD is not set CONFIG_ZSWAP_ZPOOL_DEFAULT_ZBUD=y # CONFIG_ZSWAP_ZPOOL_DEFAULT_ZSMALLOC is not set + +CONFIG_X509_CERTIFICATE_PARSER=y +CONFIG_PKCS7_MESSAGE_PARSER=y +CONFIG_FIPS_SIGNATURE_SELFTEST=y +CONFIG_FIPS_SIGNATURE_SELFTEST_RSA=y +CONFIG_FIPS_SIGNATURE_SELFTEST_ECDSA=y +CONFIG_CRYPTO_DRBG=y +CONFIG_CRYPTO_FIPS=y +CONFIG_CRYPTO_FIPS_CUSTOM_VERSION=y +CONFIG_CRYPTO_FIPS_VERSION="rocky9.20250725" +CONFIG_CRYPTO_FIPS_NAME="Rocky Linux 9 Kernel Cryptographic API" diff --git a/configs/kernel-x86_64-rhel.config b/configs/kernel-x86_64-rhel.config index 9e3e16ae4db02..eb94c7c18f569 100644 --- a/configs/kernel-x86_64-rhel.config +++ b/configs/kernel-x86_64-rhel.config @@ -7171,3 +7171,14 @@ CONFIG_ZSWAP=y # CONFIG_ZSWAP_ZPOOL_DEFAULT_Z3FOLD is not set CONFIG_ZSWAP_ZPOOL_DEFAULT_ZBUD=y # CONFIG_ZSWAP_ZPOOL_DEFAULT_ZSMALLOC is not set + +CONFIG_X509_CERTIFICATE_PARSER=y +CONFIG_PKCS7_MESSAGE_PARSER=y +CONFIG_FIPS_SIGNATURE_SELFTEST=y +CONFIG_FIPS_SIGNATURE_SELFTEST_RSA=y +CONFIG_FIPS_SIGNATURE_SELFTEST_ECDSA=y +CONFIG_CRYPTO_DRBG=y +CONFIG_CRYPTO_FIPS=y +CONFIG_CRYPTO_FIPS_CUSTOM_VERSION=y +CONFIG_CRYPTO_FIPS_VERSION="rocky9.20250725" +CONFIG_CRYPTO_FIPS_NAME="Rocky Linux 9 Kernel Cryptographic API" diff --git a/crypto/Kconfig b/crypto/Kconfig index c0054b9f23cbb..c5ef8cac60efa 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -23,12 +23,12 @@ if CRYPTO comment "Crypto core or helper" config CRYPTO_FIPS - bool "FIPS 200 compliance" - depends on (CRYPTO_ANSI_CPRNG || CRYPTO_DRBG) && !CRYPTO_MANAGER_DISABLE_TESTS + bool "FIPS compliance" + depends on CRYPTO_DRBG=y && !CRYPTO_MANAGER_DISABLE_TESTS depends on (MODULE_SIG || !MODULES) help This option enables the fips boot option which is - required if you want the system to operate in a FIPS 200 + required if you want the system to operate in a FIPS certification. You should say no unless you know what this is. @@ -2019,6 +2019,7 @@ config CRYPTO_ANSI_CPRNG tristate "Pseudo Random Number Generation for Cryptographic modules" select CRYPTO_AES select CRYPTO_RNG + select CRYPTO_SHA3 help This option enables the generic pseudo random number generator for cryptographic modules. Uses the Algorithm specified in diff --git a/crypto/aead.c b/crypto/aead.c index 16991095270d2..c4ece86c45bc4 100644 --- a/crypto/aead.c +++ b/crypto/aead.c @@ -35,8 +35,7 @@ static int setkey_unaligned(struct crypto_aead *tfm, const u8 *key, alignbuffer = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1); memcpy(alignbuffer, key, keylen); ret = crypto_aead_alg(tfm)->setkey(tfm, alignbuffer, keylen); - memset(alignbuffer, 0, keylen); - kfree(buffer); + kfree_sensitive(buffer); return ret; } diff --git a/crypto/cipher.c b/crypto/cipher.c index b47141ed4a9f3..395f0c2fbb9ff 100644 --- a/crypto/cipher.c +++ b/crypto/cipher.c @@ -34,8 +34,7 @@ static int setkey_unaligned(struct crypto_cipher *tfm, const u8 *key, alignbuffer = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1); memcpy(alignbuffer, key, keylen); ret = cia->cia_setkey(crypto_cipher_tfm(tfm), alignbuffer, keylen); - memset(alignbuffer, 0, keylen); - kfree(buffer); + kfree_sensitive(buffer); return ret; } diff --git a/crypto/drbg.c b/crypto/drbg.c index accf425de57f7..d14cc09b5d399 100644 --- a/crypto/drbg.c +++ b/crypto/drbg.c @@ -1283,6 +1283,12 @@ static inline int drbg_alloc_state(struct drbg_state *drbg) if (ret < 0) goto err; + /* + * Align to at least a cache line for better performance. This also + * prevents false sharing of cache lines between different instances. + */ + ret = max(ret, L1_CACHE_BYTES - 1); + drbg->Vbuf = kmalloc(drbg_statelen(drbg) + ret, GFP_KERNEL); if (!drbg->Vbuf) { ret = -ENOMEM; diff --git a/crypto/ecdh.c b/crypto/ecdh.c index fe8966511e9d7..85c64f1a40df2 100644 --- a/crypto/ecdh.c +++ b/crypto/ecdh.c @@ -10,6 +10,7 @@ #include #include #include +#include #include "ecc.h" struct ecdh_ctx { @@ -33,6 +34,8 @@ static int ecdh_set_secret(struct crypto_kpp *tfm, const void *buf, params.key_size > sizeof(u64) * ctx->ndigits) return -EINVAL; + memset(ctx->private_key, 0, sizeof(ctx->private_key)); + if (!params.key || !params.key_size) return ecc_gen_privkey(ctx->curve_id, ctx->ndigits, ctx->private_key); @@ -94,6 +97,36 @@ static int ecdh_compute_value(struct kpp_request *req) ctx->private_key, public_key); buf = public_key; nbytes = public_key_sz; + + /* + * SP800-56Arev3, 5.6.2.1.4: ("Owner Assurance of + * Pair-wise Consistency"): recompute the public key + * and check if the results match. + */ + if (fips_enabled) { + u64 *public_key_pct; + + if (ret < 0) + goto free_all; + + public_key_pct = kmalloc(public_key_sz, GFP_KERNEL); + if (!public_key_pct) { + ret = -ENOMEM; + goto free_all; + } + + ret = ecc_make_pub_key(ctx->curve_id, ctx->ndigits, + ctx->private_key, + public_key_pct); + if (ret < 0) { + kfree(public_key_pct); + goto free_all; + } + + if (memcmp(public_key, public_key_pct, public_key_sz)) + panic("ECDH PCT failed in FIPS mode"); + kfree(public_key_pct); + } } if (ret < 0) diff --git a/crypto/essiv.c b/crypto/essiv.c index 8bcc5bdcb2a95..ec81bdea25631 100644 --- a/crypto/essiv.c +++ b/crypto/essiv.c @@ -114,13 +114,16 @@ static int essiv_aead_setkey(struct crypto_aead *tfm, const u8 *key, crypto_shash_update(desc, keys.enckey, keys.enckeylen) ?: crypto_shash_finup(desc, keys.authkey, keys.authkeylen, salt); if (err) - return err; + goto out; crypto_cipher_clear_flags(tctx->essiv_cipher, CRYPTO_TFM_REQ_MASK); crypto_cipher_set_flags(tctx->essiv_cipher, crypto_aead_get_flags(tfm) & CRYPTO_TFM_REQ_MASK); - return crypto_cipher_setkey(tctx->essiv_cipher, salt, - crypto_shash_digestsize(tctx->hash)); + err = crypto_cipher_setkey(tctx->essiv_cipher, salt, + crypto_shash_digestsize(tctx->hash)); +out: + memzero_explicit(&keys, sizeof(keys)); + return err; } static int essiv_aead_setauthsize(struct crypto_aead *tfm, diff --git a/crypto/jitterentropy-kcapi.c b/crypto/jitterentropy-kcapi.c index b9edfaa51b273..4b50cbc8a2faf 100644 --- a/crypto/jitterentropy-kcapi.c +++ b/crypto/jitterentropy-kcapi.c @@ -2,7 +2,7 @@ * Non-physical true random number generator based on timing jitter -- * Linux Kernel Crypto API specific code * - * Copyright Stephan Mueller , 2015 + * Copyright Stephan Mueller , 2015 - 2023 * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -37,6 +37,8 @@ * DAMAGE. */ +#include +#include #include #include #include @@ -46,6 +48,8 @@ #include "jitterentropy.h" +#define JENT_CONDITIONING_HASH "sha3-256-generic" + /*************************************************************************** * Helper function ***************************************************************************/ @@ -60,11 +64,6 @@ void jent_zfree(void *ptr) kfree_sensitive(ptr); } -void jent_memcpy(void *dest, const void *src, unsigned int n) -{ - memcpy(dest, src, n); -} - /* * Obtain a high-resolution time stamp value. The time stamp is used to measure * the execution time of a given code path and its variations. Hence, the time @@ -91,6 +90,91 @@ void jent_get_nstime(__u64 *out) *out = tmp; } +int jent_hash_time(void *hash_state, __u64 time, u8 *addtl, + unsigned int addtl_len, __u64 hash_loop_cnt, + unsigned int stuck) +{ + struct shash_desc *hash_state_desc = (struct shash_desc *)hash_state; + SHASH_DESC_ON_STACK(desc, hash_state_desc->tfm); + u8 intermediary[SHA3_256_DIGEST_SIZE]; + __u64 j = 0; + int ret; + + desc->tfm = hash_state_desc->tfm; + + if (sizeof(intermediary) != crypto_shash_digestsize(desc->tfm)) { + pr_warn_ratelimited("Unexpected digest size\n"); + return -EINVAL; + } + + /* + * This loop fills a buffer which is injected into the entropy pool. + * The main reason for this loop is to execute something over which we + * can perform a timing measurement. The injection of the resulting + * data into the pool is performed to ensure the result is used and + * the compiler cannot optimize the loop away in case the result is not + * used at all. Yet that data is considered "additional information" + * considering the terminology from SP800-90A without any entropy. + * + * Note, it does not matter which or how much data you inject, we are + * interested in one Keccack1600 compression operation performed with + * the crypto_shash_final. + */ + for (j = 0; j < hash_loop_cnt; j++) { + ret = crypto_shash_init(desc) ?: + crypto_shash_update(desc, intermediary, + sizeof(intermediary)) ?: + crypto_shash_finup(desc, addtl, addtl_len, intermediary); + if (ret) + goto err; + } + + /* + * Inject the data from the previous loop into the pool. This data is + * not considered to contain any entropy, but it stirs the pool a bit. + */ + ret = crypto_shash_update(desc, intermediary, sizeof(intermediary)); + if (ret) + goto err; + + /* + * Insert the time stamp into the hash context representing the pool. + * + * If the time stamp is stuck, do not finally insert the value into the + * entropy pool. Although this operation should not do any harm even + * when the time stamp has no entropy, SP800-90B requires that any + * conditioning operation to have an identical amount of input data + * according to section 3.1.5. + */ + if (!stuck) { + ret = crypto_shash_update(hash_state_desc, (u8 *)&time, + sizeof(__u64)); + } + +err: + shash_desc_zero(desc); + memzero_explicit(intermediary, sizeof(intermediary)); + + return ret; +} + +int jent_read_random_block(void *hash_state, char *dst, unsigned int dst_len) +{ + struct shash_desc *hash_state_desc = (struct shash_desc *)hash_state; + u8 jent_block[SHA3_256_DIGEST_SIZE]; + /* Obtain data from entropy pool and re-initialize it */ + int ret = crypto_shash_final(hash_state_desc, jent_block) ?: + crypto_shash_init(hash_state_desc) ?: + crypto_shash_update(hash_state_desc, jent_block, + sizeof(jent_block)); + + if (!ret && dst_len) + memcpy(dst, jent_block, dst_len); + + memzero_explicit(jent_block, sizeof(jent_block)); + return ret; +} + /*************************************************************************** * Kernel crypto API interface ***************************************************************************/ @@ -98,32 +182,82 @@ void jent_get_nstime(__u64 *out) struct jitterentropy { spinlock_t jent_lock; struct rand_data *entropy_collector; + struct crypto_shash *tfm; + struct shash_desc *sdesc; }; -static int jent_kcapi_init(struct crypto_tfm *tfm) +static void jent_kcapi_cleanup(struct crypto_tfm *tfm) { struct jitterentropy *rng = crypto_tfm_ctx(tfm); - int ret = 0; - rng->entropy_collector = jent_entropy_collector_alloc(1, 0); - if (!rng->entropy_collector) - ret = -ENOMEM; + spin_lock(&rng->jent_lock); - spin_lock_init(&rng->jent_lock); - return ret; -} + if (rng->sdesc) { + shash_desc_zero(rng->sdesc); + kfree(rng->sdesc); + } + rng->sdesc = NULL; -static void jent_kcapi_cleanup(struct crypto_tfm *tfm) -{ - struct jitterentropy *rng = crypto_tfm_ctx(tfm); + if (rng->tfm) + crypto_free_shash(rng->tfm); + rng->tfm = NULL; - spin_lock(&rng->jent_lock); if (rng->entropy_collector) jent_entropy_collector_free(rng->entropy_collector); rng->entropy_collector = NULL; spin_unlock(&rng->jent_lock); } +static int jent_kcapi_init(struct crypto_tfm *tfm) +{ + struct jitterentropy *rng = crypto_tfm_ctx(tfm); + struct crypto_shash *hash; + struct shash_desc *sdesc; + int size, ret = 0; + + spin_lock_init(&rng->jent_lock); + + /* + * Use SHA3-256 as conditioner. We allocate only the generic + * implementation as we are not interested in high-performance. The + * execution time of the SHA3 operation is measured and adds to the + * Jitter RNG's unpredictable behavior. If we have a slower hash + * implementation, the execution timing variations are larger. When + * using a fast implementation, we would need to call it more often + * as its variations are lower. + */ + hash = crypto_alloc_shash(JENT_CONDITIONING_HASH, 0, 0); + if (IS_ERR(hash)) { + pr_err("Cannot allocate conditioning digest\n"); + return PTR_ERR(hash); + } + rng->tfm = hash; + + size = sizeof(struct shash_desc) + crypto_shash_descsize(hash); + sdesc = kmalloc(size, GFP_KERNEL); + if (!sdesc) { + ret = -ENOMEM; + goto err; + } + + sdesc->tfm = hash; + crypto_shash_init(sdesc); + rng->sdesc = sdesc; + + rng->entropy_collector = jent_entropy_collector_alloc(1, 0, sdesc); + if (!rng->entropy_collector) { + ret = -ENOMEM; + goto err; + } + + spin_lock_init(&rng->jent_lock); + return 0; + +err: + jent_kcapi_cleanup(tfm); + return ret; +} + static int jent_kcapi_random(struct crypto_rng *tfm, const u8 *src, unsigned int slen, u8 *rdata, unsigned int dlen) @@ -180,15 +314,24 @@ static struct rng_alg jent_alg = { .cra_module = THIS_MODULE, .cra_init = jent_kcapi_init, .cra_exit = jent_kcapi_cleanup, - } }; static int __init jent_mod_init(void) { + SHASH_DESC_ON_STACK(desc, tfm); + struct crypto_shash *tfm; int ret = 0; - ret = jent_entropy_init(); + tfm = crypto_alloc_shash(JENT_CONDITIONING_HASH, 0, 0); + if (IS_ERR(tfm)) + return PTR_ERR(tfm); + + desc->tfm = tfm; + crypto_shash_init(desc); + ret = jent_entropy_init(desc); + shash_desc_zero(desc); + crypto_free_shash(tfm); if (ret) { /* Handle permanent health test error */ if (fips_enabled) diff --git a/crypto/jitterentropy.c b/crypto/jitterentropy.c index 227cedfa4f0ae..5b224d3d7442e 100644 --- a/crypto/jitterentropy.c +++ b/crypto/jitterentropy.c @@ -2,7 +2,7 @@ * Non-physical true random number generator based on timing jitter -- * Jitter RNG standalone code. * - * Copyright Stephan Mueller , 2015 - 2020 + * Copyright Stephan Mueller , 2015 - 2023 * * Design * ====== @@ -57,21 +57,22 @@ typedef unsigned long long __u64; typedef long long __s64; typedef unsigned int __u32; +typedef unsigned char u8; #define NULL ((void *) 0) /* The entropy pool */ struct rand_data { + /* SHA3-256 is used as conditioner */ +#define DATA_SIZE_BITS 256 /* all data values that are vital to maintain the security * of the RNG are marked as SENSITIVE. A user must not * access that information while the RNG executes its loops to * calculate the next random value. */ - __u64 data; /* SENSITIVE Actual random number */ - __u64 old_data; /* SENSITIVE Previous random number */ - __u64 prev_time; /* SENSITIVE Previous time stamp */ -#define DATA_SIZE_BITS ((sizeof(__u64)) * 8) - __u64 last_delta; /* SENSITIVE stuck test */ - __s64 last_delta2; /* SENSITIVE stuck test */ - unsigned int osr; /* Oversample rate */ + void *hash_state; /* SENSITIVE hash state entropy pool */ + __u64 prev_time; /* SENSITIVE Previous time stamp */ + __u64 last_delta; /* SENSITIVE stuck test */ + __s64 last_delta2; /* SENSITIVE stuck test */ + unsigned int osr; /* Oversample rate */ #define JENT_MEMORY_BLOCKS 64 #define JENT_MEMORY_BLOCKSIZE 32 #define JENT_MEMORY_ACCESSLOOPS 128 @@ -301,15 +302,13 @@ static int jent_permanent_health_failure(struct rand_data *ec) * an entropy collection. * * Input: - * @ec entropy collector struct -- may be NULL * @bits is the number of low bits of the timer to consider * @min is the number of bits we shift the timer value to the right at * the end to make sure we have a guaranteed minimum value * * @return Newly calculated loop counter */ -static __u64 jent_loop_shuffle(struct rand_data *ec, - unsigned int bits, unsigned int min) +static __u64 jent_loop_shuffle(unsigned int bits, unsigned int min) { __u64 time = 0; __u64 shuffle = 0; @@ -317,12 +316,7 @@ static __u64 jent_loop_shuffle(struct rand_data *ec, unsigned int mask = (1<data; + /* * We fold the time value as much as possible to ensure that as many * bits of the time stamp are included as possible. @@ -344,81 +338,32 @@ static __u64 jent_loop_shuffle(struct rand_data *ec, * execution time jitter * * This function injects the individual bits of the time value into the - * entropy pool using an LFSR. + * entropy pool using a hash. * - * The code is deliberately inefficient with respect to the bit shifting - * and shall stay that way. This function is the root cause why the code - * shall be compiled without optimization. This function not only acts as - * folding operation, but this function's execution is used to measure - * the CPU execution time jitter. Any change to the loop in this function - * implies that careful retesting must be done. - * - * @ec [in] entropy collector struct - * @time [in] time stamp to be injected - * @loop_cnt [in] if a value not equal to 0 is set, use the given value as - * number of loops to perform the folding - * @stuck [in] Is the time stamp identified as stuck? + * ec [in] entropy collector + * time [in] time stamp to be injected + * stuck [in] Is the time stamp identified as stuck? * * Output: - * updated ec->data - * - * @return Number of loops the folding operation is performed + * updated hash context in the entropy collector or error code */ -static void jent_lfsr_time(struct rand_data *ec, __u64 time, __u64 loop_cnt, - int stuck) +static int jent_condition_data(struct rand_data *ec, __u64 time, int stuck) { - unsigned int i; - __u64 j = 0; - __u64 new = 0; -#define MAX_FOLD_LOOP_BIT 4 -#define MIN_FOLD_LOOP_BIT 0 - __u64 fold_loop_cnt = - jent_loop_shuffle(ec, MAX_FOLD_LOOP_BIT, MIN_FOLD_LOOP_BIT); - - /* - * testing purposes -- allow test app to set the counter, not - * needed during runtime - */ - if (loop_cnt) - fold_loop_cnt = loop_cnt; - for (j = 0; j < fold_loop_cnt; j++) { - new = ec->data; - for (i = 1; (DATA_SIZE_BITS) >= i; i++) { - __u64 tmp = time << (DATA_SIZE_BITS - i); - - tmp = tmp >> (DATA_SIZE_BITS - 1); - - /* - * Fibonacci LSFR with polynomial of - * x^64 + x^61 + x^56 + x^31 + x^28 + x^23 + 1 which is - * primitive according to - * http://poincare.matf.bg.ac.rs/~ezivkovm/publications/primpol1.pdf - * (the shift values are the polynomial values minus one - * due to counting bits from 0 to 63). As the current - * position is always the LSB, the polynomial only needs - * to shift data in from the left without wrap. - */ - tmp ^= ((new >> 63) & 1); - tmp ^= ((new >> 60) & 1); - tmp ^= ((new >> 55) & 1); - tmp ^= ((new >> 30) & 1); - tmp ^= ((new >> 27) & 1); - tmp ^= ((new >> 22) & 1); - new <<= 1; - new ^= tmp; - } - } - - /* - * If the time stamp is stuck, do not finally insert the value into - * the entropy pool. Although this operation should not do any harm - * even when the time stamp has no entropy, SP800-90B requires that - * any conditioning operation (SP800-90B considers the LFSR to be a - * conditioning operation) to have an identical amount of input - * data according to section 3.1.5. - */ - if (!stuck) - ec->data = new; +#define SHA3_HASH_LOOP (1<<3) + struct { + int rct_count; + unsigned int apt_observations; + unsigned int apt_count; + unsigned int apt_base; + } addtl = { + ec->rct_count, + ec->apt_observations, + ec->apt_count, + ec->apt_base + }; + + return jent_hash_time(ec->hash_state, time, (u8 *)&addtl, sizeof(addtl), + SHA3_HASH_LOOP, stuck); } /* @@ -452,7 +397,7 @@ static void jent_memaccess(struct rand_data *ec, __u64 loop_cnt) #define MAX_ACC_LOOP_BIT 7 #define MIN_ACC_LOOP_BIT 0 __u64 acc_loop_cnt = - jent_loop_shuffle(ec, MAX_ACC_LOOP_BIT, MIN_ACC_LOOP_BIT); + jent_loop_shuffle(MAX_ACC_LOOP_BIT, MIN_ACC_LOOP_BIT); if (NULL == ec || NULL == ec->mem) return; @@ -520,14 +465,15 @@ static int jent_measure_jitter(struct rand_data *ec) stuck = jent_stuck(ec, current_delta); /* Now call the next noise sources which also injects the data */ - jent_lfsr_time(ec, current_delta, 0, stuck); + if (jent_condition_data(ec, current_delta, stuck)) + stuck = 1; return stuck; } /* * Generator of one 64 bit random number - * Function fills rand_data->data + * Function fills rand_data->hash_state * * @ec [in] Reference to entropy collector */ @@ -574,7 +520,7 @@ static void jent_gen_entropy(struct rand_data *ec) * @return 0 when request is fulfilled or an error * * The following error codes can occur: - * -1 entropy_collector is NULL + * -1 entropy_collector is NULL or the generation failed * -2 Intermittent health failure * -3 Permanent health failure */ @@ -604,7 +550,7 @@ int jent_read_entropy(struct rand_data *ec, unsigned char *data, * Perform startup health tests and return permanent * error if it fails. */ - if (jent_entropy_init()) + if (jent_entropy_init(ec->hash_state)) return -3; return -2; @@ -614,7 +560,8 @@ int jent_read_entropy(struct rand_data *ec, unsigned char *data, tocopy = (DATA_SIZE_BITS / 8); else tocopy = len; - jent_memcpy(p, &ec->data, tocopy); + if (jent_read_random_block(ec->hash_state, p, tocopy)) + return -1; len -= tocopy; p += tocopy; @@ -628,7 +575,8 @@ int jent_read_entropy(struct rand_data *ec, unsigned char *data, ***************************************************************************/ struct rand_data *jent_entropy_collector_alloc(unsigned int osr, - unsigned int flags) + unsigned int flags, + void *hash_state) { struct rand_data *entropy_collector; @@ -655,6 +603,8 @@ struct rand_data *jent_entropy_collector_alloc(unsigned int osr, osr = 1; /* minimum sampling rate is 1 */ entropy_collector->osr = osr; + entropy_collector->hash_state = hash_state; + /* fill the data pad with non-zero values */ jent_gen_entropy(entropy_collector); @@ -668,7 +618,7 @@ void jent_entropy_collector_free(struct rand_data *entropy_collector) jent_zfree(entropy_collector); } -int jent_entropy_init(void) +int jent_entropy_init(void *hash_state) { int i; __u64 delta_sum = 0; @@ -681,6 +631,7 @@ int jent_entropy_init(void) /* Required for RCT */ ec.osr = 1; + ec.hash_state = hash_state; /* We could perform statistical tests here, but the problem is * that we only have a few loop counts to do testing. These @@ -718,7 +669,7 @@ int jent_entropy_init(void) /* Invoke core entropy collection logic */ jent_get_nstime(&time); ec.prev_time = time; - jent_lfsr_time(&ec, time, 0, 0); + jent_condition_data(&ec, time, 0); jent_get_nstime(&time2); /* test whether timer works */ diff --git a/crypto/jitterentropy.h b/crypto/jitterentropy.h index 5cc583f6bc6b8..b3890ff26a023 100644 --- a/crypto/jitterentropy.h +++ b/crypto/jitterentropy.h @@ -2,14 +2,18 @@ extern void *jent_zalloc(unsigned int len); extern void jent_zfree(void *ptr); -extern void jent_memcpy(void *dest, const void *src, unsigned int n); extern void jent_get_nstime(__u64 *out); +extern int jent_hash_time(void *hash_state, __u64 time, u8 *addtl, + unsigned int addtl_len, __u64 hash_loop_cnt, + unsigned int stuck); +int jent_read_random_block(void *hash_state, char *dst, unsigned int dst_len); struct rand_data; -extern int jent_entropy_init(void); +extern int jent_entropy_init(void *hash_state); extern int jent_read_entropy(struct rand_data *ec, unsigned char *data, unsigned int len); extern struct rand_data *jent_entropy_collector_alloc(unsigned int osr, - unsigned int flags); + unsigned int flags, + void *hash_state); extern void jent_entropy_collector_free(struct rand_data *entropy_collector); diff --git a/crypto/rng.c b/crypto/rng.c index c650678106a7f..011a510492580 100644 --- a/crypto/rng.c +++ b/crypto/rng.c @@ -6,6 +6,9 @@ * * Copyright (c) 2008 Neil Horman * Copyright (c) 2015 Herbert Xu + * + * Copyright (C) 2025 Ctrl IQ, Inc. + * Author: Sultan Alsawaf */ #include @@ -14,10 +17,9 @@ #include #include #include -#include #include +#include #include -#include #include #include #include @@ -26,12 +28,39 @@ #include "internal.h" -static ____cacheline_aligned_in_smp DEFINE_MUTEX(crypto_reseed_rng_lock); -static struct crypto_rng *crypto_reseed_rng; -static ____cacheline_aligned_in_smp DEFINE_MUTEX(crypto_default_rng_lock); +static ____cacheline_aligned_in_smp DEFINE_RT_MUTEX(crypto_default_rng_lock); struct crypto_rng *crypto_default_rng; EXPORT_SYMBOL_GPL(crypto_default_rng); -static int crypto_default_rng_refcnt; +static unsigned int crypto_default_rng_refcnt; +static bool drbg_registered __ro_after_init; + +/* + * Per-CPU RNG instances are only used by crypto_devrandom_rng. The global RNG, + * crypto_default_rng, is only used directly by other drivers. + * + * Per-CPU instances of the DRBG are efficient because the DRBG itself supports + * an arbitrary number of instances and can be seeded on a per-CPU basis. + * + * Specifically, the DRBG is seeded by the CRNG and the Jitter RNG. The CRNG is + * globally accessible and is already per-CPU. And while the Jitter RNG _isn't_ + * per-CPU, creating a DRBG instance also creates a Jitter RNG instance; + * therefore, per-CPU DRBG instances implies per-CPU Jitter RNG instances. + */ +struct cpu_rng_inst { + local_lock_t lock; + struct rt_mutex mlock; + struct crypto_rng *rng; + void *page; +}; + +static DEFINE_PER_CPU_ALIGNED(struct cpu_rng_inst, pcpu_default_rng) = { + .lock = INIT_LOCAL_LOCK(pcpu_default_rng.lock), + .mlock = __RT_MUTEX_INITIALIZER(pcpu_default_rng.mlock) +}; +static DEFINE_PER_CPU_ALIGNED(struct cpu_rng_inst, pcpu_reseed_rng) = { + /* The reseed instances don't use the local lock */ + .mlock = __RT_MUTEX_INITIALIZER(pcpu_reseed_rng.mlock) +}; int crypto_rng_reset(struct crypto_rng *tfm, const u8 *seed, unsigned int slen) { @@ -145,11 +174,11 @@ int crypto_get_default_rng(void) { int err; - mutex_lock(&crypto_default_rng_lock); + rt_mutex_lock(&crypto_default_rng_lock); err = crypto_get_rng(&crypto_default_rng); if (!err) crypto_default_rng_refcnt++; - mutex_unlock(&crypto_default_rng_lock); + rt_mutex_unlock(&crypto_default_rng_lock); return err; } @@ -157,39 +186,25 @@ EXPORT_SYMBOL_GPL(crypto_get_default_rng); void crypto_put_default_rng(void) { - mutex_lock(&crypto_default_rng_lock); + rt_mutex_lock(&crypto_default_rng_lock); crypto_default_rng_refcnt--; - mutex_unlock(&crypto_default_rng_lock); + rt_mutex_unlock(&crypto_default_rng_lock); } EXPORT_SYMBOL_GPL(crypto_put_default_rng); #if defined(CONFIG_CRYPTO_RNG) || defined(CONFIG_CRYPTO_RNG_MODULE) -static int crypto_del_rng(struct crypto_rng **rngp, int *refcntp, - struct mutex *lock) +int crypto_del_default_rng(void) { - int err = -EBUSY; + bool busy; - mutex_lock(lock); - if (refcntp && *refcntp) - goto out; - - crypto_free_rng(*rngp); - *rngp = NULL; - - err = 0; - -out: - mutex_unlock(lock); - - return err; -} + rt_mutex_lock(&crypto_default_rng_lock); + if (!(busy = crypto_default_rng_refcnt)) { + crypto_free_rng(crypto_default_rng); + crypto_default_rng = NULL; + } + rt_mutex_unlock(&crypto_default_rng_lock); -int crypto_del_default_rng(void) -{ - return crypto_del_rng(&crypto_default_rng, &crypto_default_rng_refcnt, - &crypto_default_rng_lock) ?: - crypto_del_rng(&crypto_reseed_rng, NULL, - &crypto_reseed_rng_lock); + return busy ? -EBUSY : 0; } EXPORT_SYMBOL_GPL(crypto_del_default_rng); #endif @@ -201,6 +216,19 @@ int crypto_register_rng(struct rng_alg *alg) if (alg->seedsize > PAGE_SIZE / 8) return -EINVAL; + /* + * In FIPS mode, the DRBG must take precedence over all other "stdrng" + * algorithms. Therefore, forbid registration of a non-DRBG stdrng in + * FIPS mode. All of the DRBG's driver names are prefixed with "drbg_". + * This also stops new stdrng instances from getting registered after it + * is known that the DRBG is registered, so a new module can't come in + * and pretend to be the DRBG. And when CONFIG_CRYPTO_FIPS is enabled, + * the DRBG is built into the kernel directly; it can't be a module. + */ + if (fips_enabled && !strcmp(base->cra_name, "stdrng") && + (drbg_registered || strncmp(base->cra_driver_name, "drbg_", 5))) + return -EINVAL; + base->cra_type = &crypto_rng_type; base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK; base->cra_flags |= CRYPTO_ALG_TYPE_RNG; @@ -225,6 +253,18 @@ int crypto_register_rngs(struct rng_alg *algs, int count) goto err; } + /* + * Track when the DRBG is registered in FIPS mode. The DRBG calls + * crypto_register_rngs() to register its stdrng instances, and since + * crypto_register_rng() only allows stdrng instances from the DRBG in + * FIPS mode, a successful stdrng registration means it was the DRBG. + * Just check the first alg in the array to see if it's called "stdrng", + * since all of the DRBG's algorithms are named "stdrng". Once + * drbg_registered is set to true, this if-statement is always false. + */ + if (fips_enabled && !strcmp(algs->base.cra_name, "stdrng")) + drbg_registered = true; + return 0; err: @@ -244,102 +284,375 @@ void crypto_unregister_rngs(struct rng_alg *algs, int count) } EXPORT_SYMBOL_GPL(crypto_unregister_rngs); -static ssize_t crypto_devrandom_read_iter(struct iov_iter *iter, bool reseed) +/* + * On non-PREEMPT_RT kernels, local locks disable preemption. When there's no + * rng allocated, one must be allocated by calling crypto_get_rng(), which can + * sleep. Therefore, crypto_get_rng() cannot be called under local_lock(), so if + * our CPU's RNG instance doesn't have an rng allocated, we drop the local lock + * and take a mutex lock instead. After the local lock is dropped, the current + * task can be freely migrated to another CPU, which means that calling + * local_lock() again might not result in the same instance getting locked as + * before. That's why this function exists: to loop on calling local_lock() and + * allocating an rng as needed with crypto_get_rng() until the current CPU's + * instance is found to have an rng allocated. If crypto_get_rng() ever fails, + * this function returns an error even if there are instances for other CPUs + * which _do_ have an rng allocated. + */ +static __always_inline struct cpu_rng_inst * +lock_default_rng(struct crypto_rng **rng) __acquires(&cri->lock) { - struct crypto_rng *rng; - u8 tmp[256]; - ssize_t ret; + struct cpu_rng_inst __percpu *pcri = &pcpu_default_rng; + struct cpu_rng_inst *cri; + int ret; + + while (1) { + local_lock(&pcri->lock); + cri = this_cpu_ptr(pcri); + /* + * cri->rng can only transition from NULL to non-NULL. This may + * occur on a different CPU, thus cri->rng must be read + * atomically to prevent data races; this elides mlock by + * pairing with the WRITE_ONCE() in the slow path below. + * + * And if cri->rng is non-NULL, then it is good to go. To avoid + * data races due to load speculation on torn cri->rng loads + * _after_ the NULL check, one of the following is required: + * 1. smp_acquire__after_ctrl_dep() in the if-statement + * 2. All cri->rng reads are performed with READ_ONCE() + * 3. cri->rng is never read again outside this function + * + * Option #3 yields the best performance, so this function + * provides the rng pointer as an output for the caller to use. + */ + *rng = READ_ONCE(cri->rng); + if (likely(*rng)) + return cri; + + /* + * Slow path: there's no rng currently allocated to this instance. + * Release the local lock and acquire this instance's mlock to + * perform the allocation. + * + * Note that this task may be migrated to a different CPU now! + */ + local_unlock(&cri->lock); + rt_mutex_lock(&cri->mlock); + if (!cri->rng) { + struct crypto_rng *new_rng = NULL; + + ret = crypto_get_rng(&new_rng); + if (ret) { + rt_mutex_unlock(&cri->mlock); + break; + } - if (unlikely(!iov_iter_count(iter))) - return 0; + /* + * Pairs with READ_ONCE() above, because we might not be + * on the same CPU anymore as when we first got `cri`. + */ + WRITE_ONCE(cri->rng, new_rng); + } + rt_mutex_unlock(&cri->mlock); + } - if (reseed) { - u32 flags = 0; + /* + * Even if this task got migrated to another CPU that _does_ have an rng + * allocated, just bail out if crypto_get_rng() ever fails in order to + * avoid looping forever. + */ + return ERR_PTR(ret); +} - /* If reseeding is requested, acquire a lock on - * crypto_reseed_rng so it is not swapped out until - * the initial random bytes are generated. - * - * The algorithm implementation is also protected with - * a separate mutex (drbg->drbg_mutex) around the - * reseed-and-generate operation. +static __always_inline struct cpu_rng_inst * +lock_reseed_rng(struct crypto_rng **rng) __acquires(&cri->mlock) +{ + struct cpu_rng_inst __percpu *pcri = &pcpu_reseed_rng; + struct cpu_rng_inst *cri; + int ret; + + /* + * Use whichever CPU this task is currently running on, knowing full + * well that the task can freely migrate to other CPUs. The reseed RNG + * requires holding a lock across the entire devrandom read, so that + * another task cannot extract entropy from the same seed. In other + * words, when reseeding is requested, reseeding must be done every time + * every time mlock is acquired. + */ + cri = raw_cpu_ptr(pcri); + rt_mutex_lock(&cri->mlock); + if (likely(cri->rng)) { + /* + * Since this rng instance wasn't just allocated, it needs to be + * explicitly reseeded. New rng instances are seeded on creation + * in crypto_get_rng() and thus don't need explicit reseeding. */ - mutex_lock(&crypto_reseed_rng_lock); + crypto_tfm_set_flags(crypto_rng_tfm(cri->rng), + CRYPTO_TFM_REQ_NEED_RESEED); + } else { + ret = crypto_get_rng(&cri->rng); + if (ret) { + rt_mutex_unlock(&cri->mlock); + return ERR_PTR(ret); + } + } - /* If crypto_default_rng is not set, it will be seeded - * at creation in __crypto_get_default_rng and thus no - * reseeding is needed. - */ - if (crypto_reseed_rng) - flags |= CRYPTO_TFM_REQ_NEED_RESEED; + *rng = cri->rng; + return cri; +} - ret = crypto_get_rng(&crypto_reseed_rng); - if (ret) { - mutex_unlock(&crypto_reseed_rng_lock); - return ret; +#define lock_local_rng(rng, reseed) \ + ({ (reseed) ? lock_reseed_rng(rng) : lock_default_rng(rng); }) + +#define unlock_local_rng(cri, reseed) \ +do { \ + if (reseed) \ + rt_mutex_unlock(&(cri)->mlock); \ + else \ + local_unlock(&(cri)->lock); \ +} while (0) + +static __always_inline void +clear_rng_page(struct cpu_rng_inst *cri, size_t count) +{ + /* For zeroing a whole page, clear_page() is faster than memset() */ + count < PAGE_SIZE ? memset(cri->page, 0, count) : clear_page(cri->page); +} + +static ssize_t crypto_devrandom_read_iter(struct iov_iter *iter, bool reseed) +{ + /* lock_local_rng() puts us in atomic context for !reseed on non-RT */ + const bool atomic = !reseed && !IS_ENABLED(CONFIG_PREEMPT_RT); + const bool user_no_reseed = !reseed && user_backed_iter(iter); + size_t ulen, page_dirty_len = 0; + struct cpu_rng_inst *cri; + struct crypto_rng *rng; + void __user *uaddr; + struct page *upage; + ssize_t ret = 0; + + if (unlikely(!iov_iter_count(iter))) + return 0; + + /* Set up the starting user destination address and length */ + if (user_no_reseed) { + if (iter_is_ubuf(iter)) { + uaddr = iter->ubuf + iter->iov_offset; + ulen = iov_iter_count(iter); + } else if (iter_is_iovec(iter)) { + uaddr = iter_iov_addr(iter); + ulen = iter_iov_len(iter); + } else { + /* + * ITER_UBUF and ITER_IOVEC are the only user-backed + * iters. Bug out if a new user-backed iter appears. + */ + BUG(); } + } - rng = crypto_reseed_rng; - crypto_tfm_set_flags(crypto_rng_tfm(rng), flags); - } else { - ret = crypto_get_default_rng(); - if (ret) - return ret; - rng = crypto_default_rng; +restart: + /* + * Pin the user page backing the current user destination address, + * potentially prefaulting to allocate a page for the destination. By + * prefaulting without the RNG lock held, the DRBG won't be blocked by + * time spent on page faults for this task, and thus the DRBG can still + * be used by other tasks. + */ + if (user_no_reseed && pin_user_pages_fast((unsigned long)uaddr, 1, + FOLL_WRITE, &upage) != 1) + goto exit; + + cri = lock_local_rng(&rng, reseed); + if (IS_ERR(cri)) { + if (!ret) + ret = PTR_ERR(cri); + goto unpin_upage; } - for (;;) { - size_t i, copied; + while (1) { + size_t copied, i = min(iov_iter_count(iter), PAGE_SIZE); + bool resched_without_lock = false; int err; - i = min_t(size_t, iov_iter_count(iter), sizeof(tmp)); - err = crypto_rng_get_bytes(rng, tmp, i); + /* + * Generate up to one page at a time, and align to a page + * boundary so we only need to pin one user page at a time. + */ + if (user_no_reseed) + i = min3(i, PAGE_SIZE - offset_in_page(uaddr), ulen); + + /* + * On non-PREEMPT_RT kernels, local locks disable preemption. + * The DRBG's generate() function has a mutex lock, which could + * mean that we'll schedule while atomic if the mutex lock + * sleeps. However, that will never happen if we ensure that + * there's never any contention on the DRBG's mutex lock while + * we're atomic! Our local lock ensures calls to the DRBG are + * always serialized, so there's no contention from here. And + * the DRBG only uses its mutex lock from one other path, when + * an instance of the DRBG is freshly allocated, which we only + * do from crypto_get_rng(). So the DRBG's mutex lock is + * guaranteed to not have contention when we call generate() and + * thus it'll never sleep here. And of course, nothing else in + * generate() ever sleeps. + */ + err = crypto_rng_get_bytes(rng, cri->page, i); if (err) { - ret = ret ?: err; + if (!ret) + ret = err; break; } - copied = copy_to_iter(tmp, i, iter); - ret += copied; + /* + * Record the number of bytes used in cri->page and either copy + * directly to the user address without faulting, or copy to the + * iter which is always backed by kernel memory when !reseed && + * !user_backed_iter(). When reseed == true, the iter may be + * backed by user memory, but we copy to it with the possibility + * of page faults anyway because we need to hold the lock across + * the entire call; this is why a mutex is used instead of a + * local lock for the reseed RNG, to permit sleeping without + * yielding the DRBG instance. + */ + page_dirty_len = max(i, page_dirty_len); + if (user_no_reseed) { + err = copy_to_user_nofault(uaddr, cri->page, i); + if (err >= 0) { + iov_iter_advance(iter, i - err); + ret += i - err; + } + if (err) + break; + } else { + /* + * We know that copying from cri->page is safe, so use + * _copy_to_iter() directly to skip check_copy_size(). + */ + copied = _copy_to_iter(cri->page, i, iter); + ret += copied; + if (copied != i) + break; + } - if (!iov_iter_count(iter) || copied != i) + /* + * Quit when either the requested number of bytes have been + * generated or there is a pending signal. + */ + if (!iov_iter_count(iter) || signal_pending(current)) break; - BUILD_BUG_ON(PAGE_SIZE % sizeof(tmp) != 0); - if (ret % PAGE_SIZE == 0) { - if (signal_pending(current)) - break; - cond_resched(); + /* Compute the next user destination address and length */ + if (user_no_reseed) { + ulen -= i; + if (likely(ulen)) { + uaddr += i; + } else { + /* + * This path is only reachable by ITER_IOVEC + * because ulen is initialized to the request + * size for ITER_UBUF, and therefore ITER_UBUF + * will always quit at the iov_iter_count() + * check above before ulen can become zero. + * + * iter->iov_offset is guaranteed to be zero + * here, so iter_iov_{addr|len}() isn't needed. + */ + uaddr = iter_iov(iter)->iov_base; + ulen = iter_iov(iter)->iov_len; + } + + unpin_user_page(upage); + } + + /* + * Reschedule right now if needed and we're not atomic. If we're + * atomic, then we must first drop the lock to reschedule. + */ + if (need_resched()) { + if (atomic) + resched_without_lock = true; + else + cond_resched(); + } + + /* + * Optimistically try to pin the next user page without + * faulting, so we don't need to clear cri->page and drop the + * lock on every iteration. If this fails, we fall back to + * pinning with the option to prefault. + */ + if (user_no_reseed && !resched_without_lock && + pin_user_pages_fast_only((unsigned long)uaddr, 1, + FOLL_WRITE, &upage) == 1) + continue; + + /* + * Restart if either rescheduling is needed (and requires + * dropping the lock since we're atomic) or the optimistic page + * pinning attempt failed. + * + * This always implies `reseed == false`, so unlock_local_rng() + * can just be passed `false` for reseed to eliminate a branch. + */ + if (resched_without_lock || user_no_reseed) { + /* + * Clear the buffer of our latest random bytes before + * unlocking and potentially migrating CPUs, in which + * case we wouldn't have the same `cri` anymore. + */ + clear_rng_page(cri, page_dirty_len); + unlock_local_rng(cri, false); + page_dirty_len = 0; + if (resched_without_lock) + cond_resched(); + goto restart; } } - if (reseed) - mutex_unlock(&crypto_reseed_rng_lock); - else - crypto_put_default_rng(); - memzero_explicit(tmp, sizeof(tmp)); + if (page_dirty_len) + clear_rng_page(cri, page_dirty_len); + unlock_local_rng(cri, reseed); +unpin_upage: + if (user_no_reseed) + unpin_user_page(upage); +exit: return ret ? ret : -EFAULT; } static const struct random_extrng crypto_devrandom_rng = { - .extrng_read_iter = crypto_devrandom_read_iter, - .owner = THIS_MODULE, + .extrng_read_iter = crypto_devrandom_read_iter }; -static int __init crypto_rng_init(void) +static void __init alloc_pcpu_inst(struct cpu_rng_inst __percpu *pcri) { - if (fips_enabled) - random_register_extrng(&crypto_devrandom_rng); - return 0; + int cpu; + + for_each_possible_cpu(cpu) { + struct cpu_rng_inst *cri = per_cpu_ptr(pcri, cpu); + + cri->page = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL); + local_lock_init(&cri->lock); + } } -static void __exit crypto_rng_exit(void) +static int __init crypto_rng_init(void) { - random_unregister_extrng(); + if (!fips_enabled) + return 0; + + /* + * Never fail to register the RNG override in FIPS mode because failure + * would result in the system quietly booting without the FIPS-mandated + * RNG installed. This would be catastrophic for FIPS compliance, hence + * the RNG override setup is *not* allowed to fail. + */ + alloc_pcpu_inst(&pcpu_default_rng); + alloc_pcpu_inst(&pcpu_reseed_rng); + random_register_extrng(&crypto_devrandom_rng); + return 0; } late_initcall(crypto_rng_init); -module_exit(crypto_rng_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Random Number Generator"); diff --git a/drivers/char/random.c b/drivers/char/random.c index 317a0b15dc34c..5fe3118a3c278 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -51,7 +51,6 @@ #include #include #include -#include #include #include #include @@ -314,7 +313,7 @@ static void crng_fast_key_erasure(u8 key[CHACHA_KEY_SIZE], /* * Hook for external RNG. */ -static const struct random_extrng __rcu *extrng; +static const struct random_extrng *extrng __ro_after_init; /* * This function returns a ChaCha state that you may use for generating @@ -966,18 +965,12 @@ void __init add_bootloader_randomness(const void *buf, size_t len) credit_init_bits(len * 8); } -void random_register_extrng(const struct random_extrng *rng) +void __init random_register_extrng(const struct random_extrng *rng) { - rcu_assign_pointer(extrng, rng); + /* Don't allow the registered extrng to be overridden */ + BUG_ON(extrng); + extrng = rng; } -EXPORT_SYMBOL_GPL(random_register_extrng); - -void random_unregister_extrng(void) -{ - RCU_INIT_POINTER(extrng, NULL); - synchronize_rcu(); -} -EXPORT_SYMBOL_GPL(random_unregister_extrng); #if IS_ENABLED(CONFIG_VMGENID) static BLOCKING_NOTIFIER_HEAD(vmfork_chain); @@ -1386,7 +1379,6 @@ static void __cold try_to_generate_entropy(void) SYSCALL_DEFINE3(getrandom, char __user *, ubuf, size_t, len, unsigned int, flags) { - const struct random_extrng *rng; struct iov_iter iter; struct iovec iov; int ret; @@ -1404,19 +1396,11 @@ SYSCALL_DEFINE3(getrandom, char __user *, ubuf, size_t, len, unsigned int, flags if (len > INT_MAX) len = INT_MAX; - rcu_read_lock(); - rng = rcu_dereference(extrng); - if (rng && !try_module_get(rng->owner)) - rng = NULL; - rcu_read_unlock(); - - if (rng) { + if (extrng) { ret = import_single_range(READ, ubuf, len, &iov, &iter); if (unlikely(ret)) return ret; - ret = rng->extrng_read_iter(&iter, !!(flags & GRND_RANDOM)); - module_put(rng->owner); - return ret; + return extrng->extrng_read_iter(&iter, !!(flags & GRND_RANDOM)); } if (!crng_ready() && !(flags & GRND_INSECURE)) { @@ -1589,52 +1573,24 @@ static int random_fasync(int fd, struct file *filp, int on) static int random_open(struct inode *inode, struct file *filp) { - const struct random_extrng *rng; - - rcu_read_lock(); - rng = rcu_dereference(extrng); - if (rng && !try_module_get(rng->owner)) - rng = NULL; - rcu_read_unlock(); - - if (!rng) - return 0; - - filp->f_op = &extrng_random_fops; - filp->private_data = rng->owner; + if (extrng) + filp->f_op = &extrng_random_fops; return 0; } static int urandom_open(struct inode *inode, struct file *filp) { - const struct random_extrng *rng; + if (extrng) + filp->f_op = &extrng_urandom_fops; - rcu_read_lock(); - rng = rcu_dereference(extrng); - if (rng && !try_module_get(rng->owner)) - rng = NULL; - rcu_read_unlock(); - - if (!rng) - return 0; - - filp->f_op = &extrng_urandom_fops; - filp->private_data = rng->owner; - - return 0; -} - -static int extrng_release(struct inode *inode, struct file *filp) -{ - module_put(filp->private_data); return 0; } static ssize_t extrng_read_iter(struct kiocb *kiocb, struct iov_iter *iter) { - return rcu_dereference_raw(extrng)->extrng_read_iter(iter, false); + return extrng->extrng_read_iter(iter, false); } const struct file_operations random_fops = { @@ -1670,7 +1626,6 @@ static const struct file_operations extrng_random_fops = { .unlocked_ioctl = random_ioctl, .fasync = random_fasync, .llseek = noop_llseek, - .release = extrng_release, .splice_read = generic_file_splice_read, .splice_write = iter_file_splice_write, }; @@ -1682,7 +1637,6 @@ static const struct file_operations extrng_urandom_fops = { .unlocked_ioctl = random_ioctl, .fasync = random_fasync, .llseek = noop_llseek, - .release = extrng_release, .splice_read = generic_file_splice_read, .splice_write = iter_file_splice_write, }; diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index fb8cd8469328e..35f26fa1ffe76 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -1077,68 +1077,10 @@ int vmbus_sendpacket(struct vmbus_channel *channel, void *buffer, EXPORT_SYMBOL(vmbus_sendpacket); /* - * vmbus_sendpacket_pagebuffer - Send a range of single-page buffer - * packets using a GPADL Direct packet type. This interface allows you - * to control notifying the host. This will be useful for sending - * batched data. Also the sender can control the send flags - * explicitly. - */ -int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel, - struct hv_page_buffer pagebuffers[], - u32 pagecount, void *buffer, u32 bufferlen, - u64 requestid) -{ - int i; - struct vmbus_channel_packet_page_buffer desc; - u32 descsize; - u32 packetlen; - u32 packetlen_aligned; - struct kvec bufferlist[3]; - u64 aligned_data = 0; - - if (pagecount > MAX_PAGE_BUFFER_COUNT) - return -EINVAL; - - /* - * Adjust the size down since vmbus_channel_packet_page_buffer is the - * largest size we support - */ - descsize = sizeof(struct vmbus_channel_packet_page_buffer) - - ((MAX_PAGE_BUFFER_COUNT - pagecount) * - sizeof(struct hv_page_buffer)); - packetlen = descsize + bufferlen; - packetlen_aligned = ALIGN(packetlen, sizeof(u64)); - - /* Setup the descriptor */ - desc.type = VM_PKT_DATA_USING_GPA_DIRECT; - desc.flags = VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED; - desc.dataoffset8 = descsize >> 3; /* in 8-bytes granularity */ - desc.length8 = (u16)(packetlen_aligned >> 3); - desc.transactionid = VMBUS_RQST_ERROR; /* will be updated in hv_ringbuffer_write() */ - desc.reserved = 0; - desc.rangecount = pagecount; - - for (i = 0; i < pagecount; i++) { - desc.range[i].len = pagebuffers[i].len; - desc.range[i].offset = pagebuffers[i].offset; - desc.range[i].pfn = pagebuffers[i].pfn; - } - - bufferlist[0].iov_base = &desc; - bufferlist[0].iov_len = descsize; - bufferlist[1].iov_base = buffer; - bufferlist[1].iov_len = bufferlen; - bufferlist[2].iov_base = &aligned_data; - bufferlist[2].iov_len = (packetlen_aligned - packetlen); - - return hv_ringbuffer_write(channel, bufferlist, 3, requestid, NULL); -} -EXPORT_SYMBOL_GPL(vmbus_sendpacket_pagebuffer); - -/* - * vmbus_sendpacket_multipagebuffer - Send a multi-page buffer packet + * vmbus_sendpacket_mpb_desc - Send one or more multi-page buffer packets * using a GPADL Direct packet type. - * The buffer includes the vmbus descriptor. + * The desc argument must include space for the VMBus descriptor. The + * rangecount field must already be set. */ int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel, struct vmbus_packet_mpb_array *desc, @@ -1160,7 +1102,6 @@ int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel, desc->length8 = (u16)(packetlen_aligned >> 3); desc->transactionid = VMBUS_RQST_ERROR; /* will be updated in hv_ringbuffer_write() */ desc->reserved = 0; - desc->rangecount = 1; bufferlist[0].iov_base = desc; bufferlist[0].iov_len = desc_size; diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index fd23530e65b11..763e19d73810f 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -157,7 +157,6 @@ struct hv_netvsc_packet { u8 cp_partial; /* partial copy into send buffer */ u8 rmsg_size; /* RNDIS header and PPI size */ - u8 rmsg_pgcnt; /* page count of RNDIS header and PPI */ u8 page_buf_cnt; u16 q_idx; @@ -892,6 +891,18 @@ struct nvsp_message { sizeof(struct nvsp_message)) #define NETVSC_MIN_IN_MSG_SIZE sizeof(struct vmpacket_descriptor) +/* Maximum # of contiguous data ranges that can make up a trasmitted packet. + * Typically it's the max SKB fragments plus 2 for the rndis packet and the + * linear portion of the SKB. But if MAX_SKB_FRAGS is large, the value may + * need to be limited to MAX_PAGE_BUFFER_COUNT, which is the max # of entries + * in a GPA direct packet sent to netvsp over VMBus. + */ +#if MAX_SKB_FRAGS + 2 < MAX_PAGE_BUFFER_COUNT +#define MAX_DATA_RANGES (MAX_SKB_FRAGS + 2) +#else +#define MAX_DATA_RANGES MAX_PAGE_BUFFER_COUNT +#endif + /* Estimated requestor size: * out_ring_size/min_out_msg_size + in_ring_size/min_in_msg_size */ diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 9afb08dbc350a..87ac2a5f18091 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -952,8 +952,7 @@ static void netvsc_copy_to_send_buf(struct netvsc_device *net_device, + pend_size; int i; u32 padding = 0; - u32 page_count = packet->cp_partial ? packet->rmsg_pgcnt : - packet->page_buf_cnt; + u32 page_count = packet->cp_partial ? 1 : packet->page_buf_cnt; u32 remain; /* Add padding */ @@ -1054,6 +1053,42 @@ static int netvsc_dma_map(struct hv_device *hv_dev, return 0; } +/* Build an "array" of mpb entries describing the data to be transferred + * over VMBus. After the desc header fields, each "array" entry is variable + * size, and each entry starts after the end of the previous entry. The + * "offset" and "len" fields for each entry imply the size of the entry. + * + * The pfns are in HV_HYP_PAGE_SIZE, because all communication with Hyper-V + * uses that granularity, even if the system page size of the guest is larger. + * Each entry in the input "pb" array must describe a contiguous range of + * guest physical memory so that the pfns are sequential if the range crosses + * a page boundary. The offset field must be < HV_HYP_PAGE_SIZE. + */ +static inline void netvsc_build_mpb_array(struct hv_page_buffer *pb, + u32 page_buffer_count, + struct vmbus_packet_mpb_array *desc, + u32 *desc_size) +{ + struct hv_mpb_array *mpb_entry = &desc->range; + int i, j; + + for (i = 0; i < page_buffer_count; i++) { + u32 offset = pb[i].offset; + u32 len = pb[i].len; + + mpb_entry->offset = offset; + mpb_entry->len = len; + + for (j = 0; j < HVPFN_UP(offset + len); j++) + mpb_entry->pfn_array[j] = pb[i].pfn + j; + + mpb_entry = (struct hv_mpb_array *)&mpb_entry->pfn_array[j]; + } + + desc->rangecount = page_buffer_count; + *desc_size = (char *)mpb_entry - (char *)desc; +} + static inline int netvsc_send_pkt( struct hv_device *device, struct hv_netvsc_packet *packet, @@ -1096,8 +1131,11 @@ static inline int netvsc_send_pkt( packet->dma_range = NULL; if (packet->page_buf_cnt) { + struct vmbus_channel_packet_page_buffer desc; + u32 desc_size; + if (packet->cp_partial) - pb += packet->rmsg_pgcnt; + pb++; ret = netvsc_dma_map(ndev_ctx->device_ctx, packet, pb); if (ret) { @@ -1105,11 +1143,12 @@ static inline int netvsc_send_pkt( goto exit; } - ret = vmbus_sendpacket_pagebuffer(out_channel, - pb, packet->page_buf_cnt, - &nvmsg, sizeof(nvmsg), - req_id); - + netvsc_build_mpb_array(pb, packet->page_buf_cnt, + (struct vmbus_packet_mpb_array *)&desc, + &desc_size); + ret = vmbus_sendpacket_mpb_desc(out_channel, + (struct vmbus_packet_mpb_array *)&desc, + desc_size, &nvmsg, sizeof(nvmsg), req_id); if (ret) netvsc_dma_unmap(ndev_ctx->device_ctx, packet); } else { @@ -1258,7 +1297,7 @@ int netvsc_send(struct net_device *ndev, packet->send_buf_index = section_index; if (packet->cp_partial) { - packet->page_buf_cnt -= packet->rmsg_pgcnt; + packet->page_buf_cnt--; packet->total_data_buflen = msd_len + packet->rmsg_size; } else { packet->page_buf_cnt = 0; diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 86c3bdad0f1b5..0fe75c9ae4c22 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -325,43 +325,10 @@ static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb, return txq; } -static u32 fill_pg_buf(unsigned long hvpfn, u32 offset, u32 len, - struct hv_page_buffer *pb) -{ - int j = 0; - - hvpfn += offset >> HV_HYP_PAGE_SHIFT; - offset = offset & ~HV_HYP_PAGE_MASK; - - while (len > 0) { - unsigned long bytes; - - bytes = HV_HYP_PAGE_SIZE - offset; - if (bytes > len) - bytes = len; - pb[j].pfn = hvpfn; - pb[j].offset = offset; - pb[j].len = bytes; - - offset += bytes; - len -= bytes; - - if (offset == HV_HYP_PAGE_SIZE && len) { - hvpfn++; - offset = 0; - j++; - } - } - - return j + 1; -} - static u32 init_page_array(void *hdr, u32 len, struct sk_buff *skb, struct hv_netvsc_packet *packet, struct hv_page_buffer *pb) { - u32 slots_used = 0; - char *data = skb->data; int frags = skb_shinfo(skb)->nr_frags; int i; @@ -370,28 +337,27 @@ static u32 init_page_array(void *hdr, u32 len, struct sk_buff *skb, * 2. skb linear data * 3. skb fragment data */ - slots_used += fill_pg_buf(virt_to_hvpfn(hdr), - offset_in_hvpage(hdr), - len, - &pb[slots_used]); + pb[0].offset = offset_in_hvpage(hdr); + pb[0].len = len; + pb[0].pfn = virt_to_hvpfn(hdr); packet->rmsg_size = len; - packet->rmsg_pgcnt = slots_used; - slots_used += fill_pg_buf(virt_to_hvpfn(data), - offset_in_hvpage(data), - skb_headlen(skb), - &pb[slots_used]); + pb[1].offset = offset_in_hvpage(skb->data); + pb[1].len = skb_headlen(skb); + pb[1].pfn = virt_to_hvpfn(skb->data); for (i = 0; i < frags; i++) { skb_frag_t *frag = skb_shinfo(skb)->frags + i; + struct hv_page_buffer *cur_pb = &pb[i + 2]; + u64 pfn = page_to_hvpfn(skb_frag_page(frag)); + u32 offset = skb_frag_off(frag); - slots_used += fill_pg_buf(page_to_hvpfn(skb_frag_page(frag)), - skb_frag_off(frag), - skb_frag_size(frag), - &pb[slots_used]); + cur_pb->offset = offset_in_hvpage(offset); + cur_pb->len = skb_frag_size(frag); + cur_pb->pfn = pfn + (offset >> HV_HYP_PAGE_SHIFT); } - return slots_used; + return frags + 2; } static int count_skb_frag_slots(struct sk_buff *skb) @@ -482,7 +448,7 @@ static int netvsc_xmit(struct sk_buff *skb, struct net_device *net, bool xdp_tx) struct net_device *vf_netdev; u32 rndis_msg_size; u32 hash; - struct hv_page_buffer pb[MAX_PAGE_BUFFER_COUNT]; + struct hv_page_buffer pb[MAX_DATA_RANGES]; /* If VF is present and up then redirect packets to it. * Skip the VF if it is marked down or has no carrier. diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 82747dfacd70f..9e73959e61ee0 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -225,8 +225,7 @@ static int rndis_filter_send_request(struct rndis_device *dev, struct rndis_request *req) { struct hv_netvsc_packet *packet; - struct hv_page_buffer page_buf[2]; - struct hv_page_buffer *pb = page_buf; + struct hv_page_buffer pb; int ret; /* Setup the packet to send it */ @@ -235,27 +234,14 @@ static int rndis_filter_send_request(struct rndis_device *dev, packet->total_data_buflen = req->request_msg.msg_len; packet->page_buf_cnt = 1; - pb[0].pfn = virt_to_phys(&req->request_msg) >> - HV_HYP_PAGE_SHIFT; - pb[0].len = req->request_msg.msg_len; - pb[0].offset = offset_in_hvpage(&req->request_msg); - - /* Add one page_buf when request_msg crossing page boundary */ - if (pb[0].offset + pb[0].len > HV_HYP_PAGE_SIZE) { - packet->page_buf_cnt++; - pb[0].len = HV_HYP_PAGE_SIZE - - pb[0].offset; - pb[1].pfn = virt_to_phys((void *)&req->request_msg - + pb[0].len) >> HV_HYP_PAGE_SHIFT; - pb[1].offset = 0; - pb[1].len = req->request_msg.msg_len - - pb[0].len; - } + pb.pfn = virt_to_phys(&req->request_msg) >> HV_HYP_PAGE_SHIFT; + pb.len = req->request_msg.msg_len; + pb.offset = offset_in_hvpage(&req->request_msg); trace_rndis_send(dev->ndev, 0, &req->request_msg); rcu_read_lock_bh(); - ret = netvsc_send(dev->ndev, packet, NULL, pb, NULL, false); + ret = netvsc_send(dev->ndev, packet, NULL, &pb, NULL, false); rcu_read_unlock_bh(); return ret; diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index 9ede8e35b19ab..c12240753d376 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -356,7 +356,7 @@ MODULE_PARM_DESC(ring_avail_percent_lowater, /* * Timeout in seconds for all devices managed by this driver. */ -static int storvsc_timeout = 180; +static const int storvsc_timeout = 180; #if IS_ENABLED(CONFIG_SCSI_FC_ATTRS) static struct scsi_transport_template *fc_transport_template; @@ -762,7 +762,7 @@ static void handle_multichannel_storage(struct hv_device *device, int max_chns) return; } - t = wait_for_completion_timeout(&request->wait_event, 10*HZ); + t = wait_for_completion_timeout(&request->wait_event, storvsc_timeout * HZ); if (t == 0) { dev_err(dev, "Failed to create sub-channel: timed out\n"); return; @@ -827,7 +827,7 @@ static int storvsc_execute_vstor_op(struct hv_device *device, if (ret != 0) return ret; - t = wait_for_completion_timeout(&request->wait_event, 5*HZ); + t = wait_for_completion_timeout(&request->wait_event, storvsc_timeout * HZ); if (t == 0) return -ETIMEDOUT; @@ -1345,6 +1345,8 @@ static int storvsc_connect_to_vsp(struct hv_device *device, u32 ring_size, return ret; ret = storvsc_channel_init(device, is_fc); + if (ret) + vmbus_close(device->channel); return ret; } @@ -1662,7 +1664,7 @@ static int storvsc_host_reset_handler(struct scsi_cmnd *scmnd) if (ret != 0) return FAILED; - t = wait_for_completion_timeout(&request->wait_event, 5*HZ); + t = wait_for_completion_timeout(&request->wait_event, storvsc_timeout * HZ); if (t == 0) return TIMEOUT_ERROR; @@ -1813,6 +1815,7 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd) return SCSI_MLQUEUE_DEVICE_BUSY; } + payload->rangecount = 1; payload->range.len = length; payload->range.offset = offset_in_hvpg; diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 550ad87c38b6c..ccb2ef2fa1e93 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1226,13 +1226,6 @@ extern int vmbus_sendpacket(struct vmbus_channel *channel, enum vmbus_packet_type type, u32 flags); -extern int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel, - struct hv_page_buffer pagebuffers[], - u32 pagecount, - void *buffer, - u32 bufferlen, - u64 requestid); - extern int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel, struct vmbus_packet_mpb_array *mpb, u32 desc_size, diff --git a/include/linux/mm.h b/include/linux/mm.h index ab100f6bd25ad..3f1772eb24261 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2571,6 +2571,8 @@ int get_user_pages_fast(unsigned long start, int nr_pages, unsigned int gup_flags, struct page **pages); int pin_user_pages_fast(unsigned long start, int nr_pages, unsigned int gup_flags, struct page **pages); +int pin_user_pages_fast_only(unsigned long start, int nr_pages, + unsigned int gup_flags, struct page **pages); void folio_add_pin(struct folio *folio); int account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc); diff --git a/include/linux/random.h b/include/linux/random.h index d4cabe51e9434..9bde794ec8d93 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -9,12 +9,6 @@ #include -struct iov_iter; -struct random_extrng { - ssize_t (*extrng_read_iter)(struct iov_iter *, bool reseed); - struct module *owner; -}; - struct notifier_block; void add_device_randomness(const void *buf, size_t len); @@ -42,9 +36,6 @@ static inline int register_random_vmfork_notifier(struct notifier_block *nb) { r static inline int unregister_random_vmfork_notifier(struct notifier_block *nb) { return 0; } #endif -void random_register_extrng(const struct random_extrng *rng); -void random_unregister_extrng(void); - void get_random_bytes(void *buf, size_t len); u8 get_random_u8(void); u16 get_random_u16(void); @@ -173,6 +164,13 @@ int random_online_cpu(unsigned int cpu); #ifndef MODULE extern const struct file_operations random_fops, urandom_fops; + +struct iov_iter; +struct random_extrng { + ssize_t (*extrng_read_iter)(struct iov_iter *iter, bool reseed); +}; + +void __init random_register_extrng(const struct random_extrng *rng); #endif #endif /* _LINUX_RANDOM_H */ diff --git a/lib/mpi/ec.c b/lib/mpi/ec.c index 40f5908e57a4f..e16dca1e23d52 100644 --- a/lib/mpi/ec.c +++ b/lib/mpi/ec.c @@ -584,6 +584,9 @@ void mpi_ec_init(struct mpi_ec_ctx *ctx, enum gcry_mpi_ec_models model, ctx->a = mpi_copy(a); ctx->b = mpi_copy(b); + ctx->d = NULL; + ctx->t.two_inv_p = NULL; + ctx->t.p_barrett = use_barrett > 0 ? mpi_barrett_init(ctx->p, 0) : NULL; mpi_ec_get_reset(ctx); diff --git a/mm/gup.c b/mm/gup.c index 16cdddef91585..40b8a69fc312f 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -3359,6 +3359,20 @@ int pin_user_pages_fast(unsigned long start, int nr_pages, } EXPORT_SYMBOL_GPL(pin_user_pages_fast); +/* + * This is the FOLL_PIN equivalent of get_user_pages_fast_only(). Behavior is + * the same, except that this one sets FOLL_PIN instead of FOLL_GET. + */ +int pin_user_pages_fast_only(unsigned long start, int nr_pages, + unsigned int gup_flags, struct page **pages) +{ + if (!is_valid_gup_args(pages, NULL, &gup_flags, + FOLL_PIN | FOLL_FAST_ONLY)) + return -EINVAL; + return internal_get_user_pages_fast(start, nr_pages, gup_flags, pages); +} +EXPORT_SYMBOL_GPL(pin_user_pages_fast_only); + /** * pin_user_pages_remote() - pin pages of a remote process * diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c index 1e6fd6ca513bd..0e0c997134ec6 100644 --- a/tools/hv/hv_kvp_daemon.c +++ b/tools/hv/hv_kvp_daemon.c @@ -77,6 +77,7 @@ enum { }; static int in_hand_shake; +static int debug; static char *os_name = ""; static char *os_major = ""; @@ -172,6 +173,20 @@ static void kvp_update_file(int pool) kvp_release_lock(pool); } +static void kvp_dump_initial_pools(int pool) +{ + int i; + + syslog(LOG_DEBUG, "===Start dumping the contents of pool %d ===\n", + pool); + + for (i = 0; i < kvp_file_info[pool].num_records; i++) + syslog(LOG_DEBUG, "pool: %d, %d/%d key=%s val=%s\n", + pool, i + 1, kvp_file_info[pool].num_records, + kvp_file_info[pool].records[i].key, + kvp_file_info[pool].records[i].value); +} + static void kvp_update_mem_state(int pool) { FILE *filep; @@ -259,6 +274,8 @@ static int kvp_file_init(void) return 1; kvp_file_info[i].num_records = 0; kvp_update_mem_state(i); + if (debug) + kvp_dump_initial_pools(i); } return 0; @@ -286,6 +303,9 @@ static int kvp_key_delete(int pool, const __u8 *key, int key_size) * Found a match; just move the remaining * entries up. */ + if (debug) + syslog(LOG_DEBUG, "%s: deleting the KVP: pool=%d key=%s val=%s", + __func__, pool, record[i].key, record[i].value); if (i == (num_records - 1)) { kvp_file_info[pool].num_records--; kvp_update_file(pool); @@ -304,20 +324,36 @@ static int kvp_key_delete(int pool, const __u8 *key, int key_size) kvp_update_file(pool); return 0; } + + if (debug) + syslog(LOG_DEBUG, "%s: could not delete KVP: pool=%d key=%s. Record not found", + __func__, pool, key); + return 1; } static int kvp_key_add_or_modify(int pool, const __u8 *key, int key_size, const __u8 *value, int value_size) { - int i; - int num_records; struct kvp_record *record; + int num_records; int num_blocks; + int i; + + if (debug) + syslog(LOG_DEBUG, "%s: got a KVP: pool=%d key=%s val=%s", + __func__, pool, key, value); if ((key_size > HV_KVP_EXCHANGE_MAX_KEY_SIZE) || - (value_size > HV_KVP_EXCHANGE_MAX_VALUE_SIZE)) + (value_size > HV_KVP_EXCHANGE_MAX_VALUE_SIZE)) { + syslog(LOG_ERR, "%s: Too long key or value: key=%s, val=%s", + __func__, key, value); + + if (debug) + syslog(LOG_DEBUG, "%s: Too long key or value: pool=%d, key=%s, val=%s", + __func__, pool, key, value); return 1; + } /* * First update the in-memory state. @@ -337,6 +373,9 @@ static int kvp_key_add_or_modify(int pool, const __u8 *key, int key_size, */ memcpy(record[i].value, value, value_size); kvp_update_file(pool); + if (debug) + syslog(LOG_DEBUG, "%s: updated: pool=%d key=%s val=%s", + __func__, pool, key, value); return 0; } @@ -348,8 +387,10 @@ static int kvp_key_add_or_modify(int pool, const __u8 *key, int key_size, record = realloc(record, sizeof(struct kvp_record) * ENTRIES_PER_BLOCK * (num_blocks + 1)); - if (record == NULL) + if (!record) { + syslog(LOG_ERR, "%s: Memory alloc failure", __func__); return 1; + } kvp_file_info[pool].num_blocks++; } @@ -357,6 +398,11 @@ static int kvp_key_add_or_modify(int pool, const __u8 *key, int key_size, memcpy(record[i].key, key, key_size); kvp_file_info[pool].records = record; kvp_file_info[pool].num_records++; + + if (debug) + syslog(LOG_DEBUG, "%s: added: pool=%d key=%s val=%s", + __func__, pool, key, value); + kvp_update_file(pool); return 0; } @@ -1355,6 +1401,7 @@ void print_usage(char *argv[]) fprintf(stderr, "Usage: %s [options]\n" "Options are:\n" " -n, --no-daemon stay in foreground, don't daemonize\n" + " -d, --debug Enable debug logs(syslog debug by default)\n" " -h, --help print this help\n", argv[0]); } @@ -1376,10 +1423,11 @@ int main(int argc, char *argv[]) static struct option long_options[] = { {"help", no_argument, 0, 'h' }, {"no-daemon", no_argument, 0, 'n' }, + {"debug", no_argument, 0, 'd' }, {0, 0, 0, 0 } }; - while ((opt = getopt_long(argc, argv, "hn", long_options, + while ((opt = getopt_long(argc, argv, "hnd", long_options, &long_index)) != -1) { switch (opt) { case 'n': @@ -1388,6 +1436,9 @@ int main(int argc, char *argv[]) case 'h': print_usage(argv); exit(0); + case 'd': + debug = 1; + break; default: print_usage(argv); exit(EXIT_FAILURE); @@ -1410,6 +1461,9 @@ int main(int argc, char *argv[]) */ kvp_get_domain_name(full_domain_name, sizeof(full_domain_name)); + if (debug) + syslog(LOG_INFO, "Logging debug info in syslog(debug)"); + if (kvp_file_init()) { syslog(LOG_ERR, "Failed to initialize the pools"); exit(EXIT_FAILURE); diff --git a/tools/testing/selftests/mm/hmm-tests.c b/tools/testing/selftests/mm/hmm-tests.c index d2cfc9b494a0e..6f75c54564176 100644 --- a/tools/testing/selftests/mm/hmm-tests.c +++ b/tools/testing/selftests/mm/hmm-tests.c @@ -159,6 +159,10 @@ FIXTURE_TEARDOWN(hmm) { int ret = close(self->fd); + if (ret != 0) { + fprintf(stderr, "close returned (%d) fd is (%d)\n", ret, self->fd); + exit(1); + } ASSERT_EQ(ret, 0); self->fd = -1; }