diff --git a/COPYING-6.12.0-55.32.1.el10_0 b/COPYING-6.12.0-55.37.1.el10_0 similarity index 100% rename from COPYING-6.12.0-55.32.1.el10_0 rename to COPYING-6.12.0-55.37.1.el10_0 diff --git a/Makefile.rhelver b/Makefile.rhelver index 6af39a99f9248..d43acfb732b56 100644 --- a/Makefile.rhelver +++ b/Makefile.rhelver @@ -12,7 +12,7 @@ RHEL_MINOR = 0 # # Use this spot to avoid future merge conflicts. # Do not trim this comment. -RHEL_RELEASE = 55.32.1 +RHEL_RELEASE = 55.37.1 # # RHEL_REBASE_NUM diff --git a/ciq/ciq_backports/kernel-6.12.0-55.34.1.el10_0/c353e898.failed b/ciq/ciq_backports/kernel-6.12.0-55.34.1.el10_0/c353e898.failed new file mode 100644 index 0000000000000..e29f082e3cb5c --- /dev/null +++ b/ciq/ciq_backports/kernel-6.12.0-55.34.1.el10_0/c353e898.failed @@ -0,0 +1,306 @@ +net: introduce per netns packet chains + +jira LE-4297 +Rebuild_History Non-Buildable kernel-6.12.0-55.34.1.el10_0 +commit-author Paolo Abeni +commit c353e8983e0dea5dbba7789033326e1ad34135b7 +Empty-Commit: Cherry-Pick Conflicts during history rebuild. +Will be included in final tarball splat. Ref for failed cherry-pick at: +ciq/ciq_backports/kernel-6.12.0-55.34.1.el10_0/c353e898.failed + +Currently network taps unbound to any interface are linked in the +global ptype_all list, affecting the performance in all the network +namespaces. + +Add per netns ptypes chains, so that in the mentioned case only +the netns owning the packet socket(s) is affected. + +While at that drop the global ptype_all list: no in kernel user +registers a tap on "any" type without specifying either the target +device or the target namespace (and IMHO doing that would not make +any sense). + +Note that this adds a conditional in the fast path (to check for +per netns ptype_specific list) and increases the dataset size by +a cacheline (owing the per netns lists). + + Reviewed-by: Sabrina Dubroca + Signed-off-by: Paolo Abeni + Reviewed-by: Eric Dumazet +Link: https://patch.msgid.link/ae405f98875ee87f8150c460ad162de7e466f8a7.1742494826.git.pabeni@redhat.com + Signed-off-by: Jakub Kicinski +(cherry picked from commit c353e8983e0dea5dbba7789033326e1ad34135b7) + Signed-off-by: Jonathan Maple + +# Conflicts: +# net/core/net_namespace.c +diff --cc net/core/net_namespace.c +index 70fea7c1a4b0,b0dfdf791ece..000000000000 +--- a/net/core/net_namespace.c ++++ b/net/core/net_namespace.c +@@@ -334,6 -334,14 +334,17 @@@ static __net_init void preinit_net(stru + idr_init(&net->netns_ids); + spin_lock_init(&net->nsid_lock); + mutex_init(&net->ipv4.ra_mutex); +++<<<<<<< HEAD +++======= ++ ++ #ifdef CONFIG_DEBUG_NET_SMALL_RTNL ++ mutex_init(&net->rtnl_mutex); ++ lock_set_cmp_fn(&net->rtnl_mutex, rtnl_net_lock_cmp_fn, NULL); ++ #endif ++ ++ INIT_LIST_HEAD(&net->ptype_all); ++ INIT_LIST_HEAD(&net->ptype_specific); +++>>>>>>> c353e8983e0d (net: introduce per netns packet chains) + preinit_net_sysctl(net); + } + +diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h +index 46f144c3aa39..fc78f7ca0aa5 100644 +--- a/include/linux/netdevice.h ++++ b/include/linux/netdevice.h +@@ -4035,7 +4035,17 @@ static __always_inline int ____dev_forward_skb(struct net_device *dev, + return 0; + } + +-bool dev_nit_active(struct net_device *dev); ++bool dev_nit_active_rcu(const struct net_device *dev); ++static inline bool dev_nit_active(const struct net_device *dev) ++{ ++ bool ret; ++ ++ rcu_read_lock(); ++ ret = dev_nit_active_rcu(dev); ++ rcu_read_unlock(); ++ return ret; ++} ++ + void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev); + + static inline void __dev_put(struct net_device *dev) +diff --git a/include/net/hotdata.h b/include/net/hotdata.h +index 30e9570beb2a..fda94b2647ff 100644 +--- a/include/net/hotdata.h ++++ b/include/net/hotdata.h +@@ -23,7 +23,6 @@ struct net_hotdata { + struct net_offload udpv6_offload; + #endif + struct list_head offload_base; +- struct list_head ptype_all; + struct kmem_cache *skbuff_cache; + struct kmem_cache *skbuff_fclone_cache; + struct kmem_cache *skb_small_head_cache; +diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h +index fffc4b5b50b1..fa814f6a6436 100644 +--- a/include/net/net_namespace.h ++++ b/include/net/net_namespace.h +@@ -83,6 +83,9 @@ struct net { + struct llist_node defer_free_list; + struct llist_node cleanup_list; /* namespaces on death row */ + ++ struct list_head ptype_all; ++ struct list_head ptype_specific; ++ + #ifdef CONFIG_KEYS + struct key_tag *key_domain; /* Key domain of operation tag */ + #endif +diff --git a/net/core/dev.c b/net/core/dev.c +index 556b8e9eadab..c4ce77e3e29a 100644 +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -570,10 +570,18 @@ static inline void netdev_set_addr_lockdep_class(struct net_device *dev) + + static inline struct list_head *ptype_head(const struct packet_type *pt) + { +- if (pt->type == htons(ETH_P_ALL)) +- return pt->dev ? &pt->dev->ptype_all : &net_hotdata.ptype_all; +- else +- return pt->dev ? &pt->dev->ptype_specific : ++ if (pt->type == htons(ETH_P_ALL)) { ++ if (!pt->af_packet_net && !pt->dev) ++ return NULL; ++ ++ return pt->dev ? &pt->dev->ptype_all : ++ &pt->af_packet_net->ptype_all; ++ } ++ ++ if (pt->dev) ++ return &pt->dev->ptype_specific; ++ ++ return pt->af_packet_net ? &pt->af_packet_net->ptype_specific : + &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK]; + } + +@@ -594,6 +602,9 @@ void dev_add_pack(struct packet_type *pt) + { + struct list_head *head = ptype_head(pt); + ++ if (WARN_ON_ONCE(!head)) ++ return; ++ + spin_lock(&ptype_lock); + list_add_rcu(&pt->list, head); + spin_unlock(&ptype_lock); +@@ -618,6 +629,9 @@ void __dev_remove_pack(struct packet_type *pt) + struct list_head *head = ptype_head(pt); + struct packet_type *pt1; + ++ if (!head) ++ return; ++ + spin_lock(&ptype_lock); + + list_for_each_entry(pt1, head, list) { +@@ -2271,16 +2285,21 @@ static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb) + } + + /** +- * dev_nit_active - return true if any network interface taps are in use ++ * dev_nit_active_rcu - return true if any network interface taps are in use ++ * ++ * The caller must hold the RCU lock + * + * @dev: network device to check for the presence of taps + */ +-bool dev_nit_active(struct net_device *dev) ++bool dev_nit_active_rcu(const struct net_device *dev) + { +- return !list_empty(&net_hotdata.ptype_all) || ++ /* Callers may hold either RCU or RCU BH lock */ ++ WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); ++ ++ return !list_empty(&dev_net(dev)->ptype_all) || + !list_empty(&dev->ptype_all); + } +-EXPORT_SYMBOL_GPL(dev_nit_active); ++EXPORT_SYMBOL_GPL(dev_nit_active_rcu); + + /* + * Support routine. Sends outgoing frames to any network +@@ -2289,11 +2308,12 @@ EXPORT_SYMBOL_GPL(dev_nit_active); + + void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) + { +- struct list_head *ptype_list = &net_hotdata.ptype_all; + struct packet_type *ptype, *pt_prev = NULL; ++ struct list_head *ptype_list; + struct sk_buff *skb2 = NULL; + + rcu_read_lock(); ++ ptype_list = &dev_net_rcu(dev)->ptype_all; + again: + list_for_each_entry_rcu(ptype, ptype_list, list) { + if (READ_ONCE(ptype->ignore_outgoing)) +@@ -2337,7 +2357,7 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) + pt_prev = ptype; + } + +- if (ptype_list == &net_hotdata.ptype_all) { ++ if (ptype_list != &dev->ptype_all) { + ptype_list = &dev->ptype_all; + goto again; + } +@@ -3580,7 +3600,7 @@ static int xmit_one(struct sk_buff *skb, struct net_device *dev, + unsigned int len; + int rc; + +- if (dev_nit_active(dev)) ++ if (dev_nit_active_rcu(dev)) + dev_queue_xmit_nit(skb, dev); + + len = skb->len; +@@ -5514,7 +5534,8 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc, + if (pfmemalloc) + goto skip_taps; + +- list_for_each_entry_rcu(ptype, &net_hotdata.ptype_all, list) { ++ list_for_each_entry_rcu(ptype, &dev_net_rcu(skb->dev)->ptype_all, ++ list) { + if (pt_prev) + ret = deliver_skb(skb, pt_prev, orig_dev); + pt_prev = ptype; +@@ -5626,6 +5647,14 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc, + deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type, + &ptype_base[ntohs(type) & + PTYPE_HASH_MASK]); ++ ++ /* orig_dev and skb->dev could belong to different netns; ++ * Even in such case we need to traverse only the list ++ * coming from skb->dev, as the ptype owner (packet socket) ++ * will use dev_net(skb->dev) to do namespace filtering. ++ */ ++ deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type, ++ &dev_net_rcu(skb->dev)->ptype_specific); + } + + deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type, +diff --git a/net/core/hotdata.c b/net/core/hotdata.c +index d0aaaaa556f2..0bc893d5f07b 100644 +--- a/net/core/hotdata.c ++++ b/net/core/hotdata.c +@@ -7,7 +7,6 @@ + + struct net_hotdata net_hotdata __cacheline_aligned = { + .offload_base = LIST_HEAD_INIT(net_hotdata.offload_base), +- .ptype_all = LIST_HEAD_INIT(net_hotdata.ptype_all), + .gro_normal_batch = 8, + + .netdev_budget = 300, +diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c +index fa6d3969734a..3e92bf0f9060 100644 +--- a/net/core/net-procfs.c ++++ b/net/core/net-procfs.c +@@ -185,7 +185,13 @@ static void *ptype_get_idx(struct seq_file *seq, loff_t pos) + } + } + +- list_for_each_entry_rcu(pt, &net_hotdata.ptype_all, list) { ++ list_for_each_entry_rcu(pt, &seq_file_net(seq)->ptype_all, list) { ++ if (i == pos) ++ return pt; ++ ++i; ++ } ++ ++ list_for_each_entry_rcu(pt, &seq_file_net(seq)->ptype_specific, list) { + if (i == pos) + return pt; + ++i; +@@ -210,6 +216,7 @@ static void *ptype_seq_start(struct seq_file *seq, loff_t *pos) + + static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos) + { ++ struct net *net = seq_file_net(seq); + struct net_device *dev; + struct packet_type *pt; + struct list_head *nxt; +@@ -232,15 +239,22 @@ static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos) + goto found; + } + } +- +- nxt = net_hotdata.ptype_all.next; +- goto ptype_all; ++ nxt = net->ptype_all.next; ++ goto net_ptype_all; + } + +- if (pt->type == htons(ETH_P_ALL)) { +-ptype_all: +- if (nxt != &net_hotdata.ptype_all) ++ if (pt->af_packet_net) { ++net_ptype_all: ++ if (nxt != &net->ptype_all && nxt != &net->ptype_specific) + goto found; ++ ++ if (nxt == &net->ptype_all) { ++ /* continue with ->ptype_specific if it's not empty */ ++ nxt = net->ptype_specific.next; ++ if (nxt != &net->ptype_specific) ++ goto found; ++ } ++ + hash = 0; + nxt = ptype_base[0].next; + } else +* Unmerged path net/core/net_namespace.c diff --git a/ciq/ciq_backports/kernel-6.12.0-55.34.1.el10_0/rebuild.details.txt b/ciq/ciq_backports/kernel-6.12.0-55.34.1.el10_0/rebuild.details.txt new file mode 100644 index 0000000000000..3552151586c56 --- /dev/null +++ b/ciq/ciq_backports/kernel-6.12.0-55.34.1.el10_0/rebuild.details.txt @@ -0,0 +1,22 @@ +Rebuild_History BUILDABLE +Rebuilding Kernel from rpm changelog with Fuzz Limit: 87.50% +Number of commits in upstream range v6.12~1..kernel-mainline: 66177 +Number of commits in rpm: 19 +Number of commits matched with upstream: 14 (73.68%) +Number of commits in upstream but not in rpm: 66163 +Number of commits NOT found in upstream: 5 (26.32%) + +Rebuilding Kernel on Branch rocky10_0_rebuild_kernel-6.12.0-55.34.1.el10_0 for kernel-6.12.0-55.34.1.el10_0 +Clean Cherry Picks: 13 (92.86%) +Empty Cherry Picks: 1 (7.14%) +_______________________________ + +__EMPTY COMMITS__________________________ +c353e8983e0dea5dbba7789033326e1ad34135b7 net: introduce per netns packet chains + +__CHANGES NOT IN UPSTREAM________________ +Porting to Rocky Linux 10, debranding and Rocky Linux branding' +Add partial riscv64 support for build root' +Provide basic VisionFive 2 support' +redhat: selftests/bpf: Add cpuv4 variant +net: gso: Forbid IPv6 TSO with extensions on devices with only IPV6_CSUM JIRA: https://issues.redhat.com/browse/RHEL-109821 Y-JIRA: https://issues.redhat.com/browse/RHEL-79173 diff --git a/ciq/ciq_backports/kernel-6.12.0-55.37.1.el10_0/a61a3e96.failed b/ciq/ciq_backports/kernel-6.12.0-55.37.1.el10_0/a61a3e96.failed new file mode 100644 index 0000000000000..45f7c032f851a --- /dev/null +++ b/ciq/ciq_backports/kernel-6.12.0-55.37.1.el10_0/a61a3e96.failed @@ -0,0 +1,810 @@ +selftests: tls: add tests for zero-length records + +jira LE-4297 +cve CVE-2025-39682 +Rebuild_History Non-Buildable kernel-6.12.0-55.37.1.el10_0 +commit-author Jakub Kicinski +commit a61a3e961baff65b0a49f862fe21ce304f279b24 +Empty-Commit: Cherry-Pick Conflicts during history rebuild. +Will be included in final tarball splat. Ref for failed cherry-pick at: +ciq/ciq_backports/kernel-6.12.0-55.37.1.el10_0/a61a3e96.failed + +Test various combinations of zero-length records. +Unfortunately, kernel cannot be coerced into producing those, +so hardcode the ciphertext messages in the test. + + Reviewed-by: Sabrina Dubroca +Link: https://patch.msgid.link/20250820021952.143068-2-kuba@kernel.org + Signed-off-by: Jakub Kicinski +(cherry picked from commit a61a3e961baff65b0a49f862fe21ce304f279b24) + Signed-off-by: Jonathan Maple + +# Conflicts: +# tools/testing/selftests/net/tls.c +diff --cc tools/testing/selftests/net/tls.c +index 1a706d03bb6b,0f5640d8dc7f..000000000000 +--- a/tools/testing/selftests/net/tls.c ++++ b/tools/testing/selftests/net/tls.c +@@@ -1668,6 -1682,778 +1680,781 @@@ TEST_F(tls, recv_efault + EXPECT_EQ(memcmp(rec2, recv_mem + 9, ret - 9), 0); + } + +++<<<<<<< HEAD +++======= ++ #define TLS_RECORD_TYPE_HANDSHAKE 0x16 ++ /* key_update, length 1, update_not_requested */ ++ static const char key_update_msg[] = "\x18\x00\x00\x01\x00"; ++ static void tls_send_keyupdate(struct __test_metadata *_metadata, int fd) ++ { ++ size_t len = sizeof(key_update_msg); ++ ++ EXPECT_EQ(tls_send_cmsg(fd, TLS_RECORD_TYPE_HANDSHAKE, ++ (char *)key_update_msg, len, 0), ++ len); ++ } ++ ++ static void tls_recv_keyupdate(struct __test_metadata *_metadata, int fd, int flags) ++ { ++ char buf[100]; ++ ++ EXPECT_EQ(tls_recv_cmsg(_metadata, fd, TLS_RECORD_TYPE_HANDSHAKE, buf, sizeof(buf), flags), ++ sizeof(key_update_msg)); ++ EXPECT_EQ(memcmp(buf, key_update_msg, sizeof(key_update_msg)), 0); ++ } ++ ++ /* set the key to 0 then 1 for RX, immediately to 1 for TX */ ++ TEST_F(tls_basic, rekey_rx) ++ { ++ struct tls_crypto_info_keys tls12_0, tls12_1; ++ char const *test_str = "test_message"; ++ int send_len = strlen(test_str) + 1; ++ char buf[20]; ++ int ret; ++ ++ if (self->notls) ++ return; ++ ++ tls_crypto_info_init(TLS_1_3_VERSION, TLS_CIPHER_AES_GCM_128, ++ &tls12_0, 0); ++ tls_crypto_info_init(TLS_1_3_VERSION, TLS_CIPHER_AES_GCM_128, ++ &tls12_1, 1); ++ ++ ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12_1, tls12_1.len); ++ ASSERT_EQ(ret, 0); ++ ++ ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12_0, tls12_0.len); ++ ASSERT_EQ(ret, 0); ++ ++ ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12_1, tls12_1.len); ++ EXPECT_EQ(ret, 0); ++ ++ EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len); ++ EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len); ++ EXPECT_EQ(memcmp(buf, test_str, send_len), 0); ++ } ++ ++ /* set the key to 0 then 1 for TX, immediately to 1 for RX */ ++ TEST_F(tls_basic, rekey_tx) ++ { ++ struct tls_crypto_info_keys tls12_0, tls12_1; ++ char const *test_str = "test_message"; ++ int send_len = strlen(test_str) + 1; ++ char buf[20]; ++ int ret; ++ ++ if (self->notls) ++ return; ++ ++ tls_crypto_info_init(TLS_1_3_VERSION, TLS_CIPHER_AES_GCM_128, ++ &tls12_0, 0); ++ tls_crypto_info_init(TLS_1_3_VERSION, TLS_CIPHER_AES_GCM_128, ++ &tls12_1, 1); ++ ++ ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12_0, tls12_0.len); ++ ASSERT_EQ(ret, 0); ++ ++ ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12_1, tls12_1.len); ++ ASSERT_EQ(ret, 0); ++ ++ ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12_1, tls12_1.len); ++ EXPECT_EQ(ret, 0); ++ ++ EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len); ++ EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len); ++ EXPECT_EQ(memcmp(buf, test_str, send_len), 0); ++ } ++ ++ TEST_F(tls_basic, disconnect) ++ { ++ char const *test_str = "test_message"; ++ int send_len = strlen(test_str) + 1; ++ struct tls_crypto_info_keys key; ++ struct sockaddr_in addr; ++ char buf[20]; ++ int ret; ++ ++ if (self->notls) ++ return; ++ ++ tls_crypto_info_init(TLS_1_3_VERSION, TLS_CIPHER_AES_GCM_128, ++ &key, 0); ++ ++ ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &key, key.len); ++ ASSERT_EQ(ret, 0); ++ ++ /* Pre-queue the data so that setsockopt parses it but doesn't ++ * dequeue it from the TCP socket. recvmsg would dequeue. ++ */ ++ EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len); ++ ++ ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &key, key.len); ++ ASSERT_EQ(ret, 0); ++ ++ addr.sin_family = AF_UNSPEC; ++ addr.sin_addr.s_addr = htonl(INADDR_ANY); ++ addr.sin_port = 0; ++ ret = connect(self->cfd, &addr, sizeof(addr)); ++ EXPECT_EQ(ret, -1); ++ EXPECT_EQ(errno, EOPNOTSUPP); ++ ++ EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len); ++ } ++ ++ TEST_F(tls, rekey) ++ { ++ char const *test_str_1 = "test_message_before_rekey"; ++ char const *test_str_2 = "test_message_after_rekey"; ++ struct tls_crypto_info_keys tls12; ++ int send_len; ++ char buf[100]; ++ ++ if (variant->tls_version != TLS_1_3_VERSION) ++ return; ++ ++ /* initial send/recv */ ++ send_len = strlen(test_str_1) + 1; ++ EXPECT_EQ(send(self->fd, test_str_1, send_len, 0), send_len); ++ EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len); ++ EXPECT_EQ(memcmp(buf, test_str_1, send_len), 0); ++ ++ /* update TX key */ ++ tls_send_keyupdate(_metadata, self->fd); ++ tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); ++ EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); ++ ++ /* send after rekey */ ++ send_len = strlen(test_str_2) + 1; ++ EXPECT_EQ(send(self->fd, test_str_2, send_len, 0), send_len); ++ ++ /* can't receive the KeyUpdate without a control message */ ++ EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1); ++ ++ /* get KeyUpdate */ ++ tls_recv_keyupdate(_metadata, self->cfd, 0); ++ ++ /* recv blocking -> -EKEYEXPIRED */ ++ EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), 0), -1); ++ EXPECT_EQ(errno, EKEYEXPIRED); ++ ++ /* recv non-blocking -> -EKEYEXPIRED */ ++ EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_DONTWAIT), -1); ++ EXPECT_EQ(errno, EKEYEXPIRED); ++ ++ /* update RX key */ ++ EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0); ++ ++ /* recv after rekey */ ++ EXPECT_NE(recv(self->cfd, buf, send_len, 0), -1); ++ EXPECT_EQ(memcmp(buf, test_str_2, send_len), 0); ++ } ++ ++ TEST_F(tls, rekey_fail) ++ { ++ char const *test_str_1 = "test_message_before_rekey"; ++ char const *test_str_2 = "test_message_after_rekey"; ++ struct tls_crypto_info_keys tls12; ++ int send_len; ++ char buf[100]; ++ ++ /* initial send/recv */ ++ send_len = strlen(test_str_1) + 1; ++ EXPECT_EQ(send(self->fd, test_str_1, send_len, 0), send_len); ++ EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len); ++ EXPECT_EQ(memcmp(buf, test_str_1, send_len), 0); ++ ++ /* update TX key */ ++ tls_send_keyupdate(_metadata, self->fd); ++ ++ if (variant->tls_version != TLS_1_3_VERSION) { ++ /* just check that rekey is not supported and return */ ++ tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); ++ EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), -1); ++ EXPECT_EQ(errno, EBUSY); ++ return; ++ } ++ ++ /* successful update */ ++ tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); ++ EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); ++ ++ /* invalid update: change of version */ ++ tls_crypto_info_init(TLS_1_2_VERSION, variant->cipher_type, &tls12, 1); ++ EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), -1); ++ EXPECT_EQ(errno, EINVAL); ++ ++ /* invalid update (RX socket): change of version */ ++ tls_crypto_info_init(TLS_1_2_VERSION, variant->cipher_type, &tls12, 1); ++ EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), -1); ++ EXPECT_EQ(errno, EINVAL); ++ ++ /* invalid update: change of cipher */ ++ if (variant->cipher_type == TLS_CIPHER_AES_GCM_256) ++ tls_crypto_info_init(variant->tls_version, TLS_CIPHER_CHACHA20_POLY1305, &tls12, 1); ++ else ++ tls_crypto_info_init(variant->tls_version, TLS_CIPHER_AES_GCM_256, &tls12, 1); ++ EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), -1); ++ EXPECT_EQ(errno, EINVAL); ++ ++ /* send after rekey, the invalid updates shouldn't have an effect */ ++ send_len = strlen(test_str_2) + 1; ++ EXPECT_EQ(send(self->fd, test_str_2, send_len, 0), send_len); ++ ++ /* can't receive the KeyUpdate without a control message */ ++ EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1); ++ ++ /* get KeyUpdate */ ++ tls_recv_keyupdate(_metadata, self->cfd, 0); ++ ++ /* recv blocking -> -EKEYEXPIRED */ ++ EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), 0), -1); ++ EXPECT_EQ(errno, EKEYEXPIRED); ++ ++ /* recv non-blocking -> -EKEYEXPIRED */ ++ EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_DONTWAIT), -1); ++ EXPECT_EQ(errno, EKEYEXPIRED); ++ ++ /* update RX key */ ++ tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); ++ EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0); ++ ++ /* recv after rekey */ ++ EXPECT_NE(recv(self->cfd, buf, send_len, 0), -1); ++ EXPECT_EQ(memcmp(buf, test_str_2, send_len), 0); ++ } ++ ++ TEST_F(tls, rekey_peek) ++ { ++ char const *test_str_1 = "test_message_before_rekey"; ++ struct tls_crypto_info_keys tls12; ++ int send_len; ++ char buf[100]; ++ ++ if (variant->tls_version != TLS_1_3_VERSION) ++ return; ++ ++ send_len = strlen(test_str_1) + 1; ++ EXPECT_EQ(send(self->fd, test_str_1, send_len, 0), send_len); ++ ++ /* update TX key */ ++ tls_send_keyupdate(_metadata, self->fd); ++ tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); ++ EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); ++ ++ EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_PEEK), send_len); ++ EXPECT_EQ(memcmp(buf, test_str_1, send_len), 0); ++ ++ EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len); ++ EXPECT_EQ(memcmp(buf, test_str_1, send_len), 0); ++ ++ /* can't receive the KeyUpdate without a control message */ ++ EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_PEEK), -1); ++ ++ /* peek KeyUpdate */ ++ tls_recv_keyupdate(_metadata, self->cfd, MSG_PEEK); ++ ++ /* get KeyUpdate */ ++ tls_recv_keyupdate(_metadata, self->cfd, 0); ++ ++ /* update RX key */ ++ EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0); ++ } ++ ++ TEST_F(tls, splice_rekey) ++ { ++ int send_len = TLS_PAYLOAD_MAX_LEN / 2; ++ char mem_send[TLS_PAYLOAD_MAX_LEN]; ++ char mem_recv[TLS_PAYLOAD_MAX_LEN]; ++ struct tls_crypto_info_keys tls12; ++ int p[2]; ++ ++ if (variant->tls_version != TLS_1_3_VERSION) ++ return; ++ ++ memrnd(mem_send, sizeof(mem_send)); ++ ++ ASSERT_GE(pipe(p), 0); ++ EXPECT_EQ(send(self->fd, mem_send, send_len, 0), send_len); ++ ++ /* update TX key */ ++ tls_send_keyupdate(_metadata, self->fd); ++ tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); ++ EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); ++ ++ EXPECT_EQ(send(self->fd, mem_send, send_len, 0), send_len); ++ ++ EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, TLS_PAYLOAD_MAX_LEN, 0), send_len); ++ EXPECT_EQ(read(p[0], mem_recv, send_len), send_len); ++ EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0); ++ ++ /* can't splice the KeyUpdate */ ++ EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, TLS_PAYLOAD_MAX_LEN, 0), -1); ++ EXPECT_EQ(errno, EINVAL); ++ ++ /* peek KeyUpdate */ ++ tls_recv_keyupdate(_metadata, self->cfd, MSG_PEEK); ++ ++ /* get KeyUpdate */ ++ tls_recv_keyupdate(_metadata, self->cfd, 0); ++ ++ /* can't splice before updating the key */ ++ EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, TLS_PAYLOAD_MAX_LEN, 0), -1); ++ EXPECT_EQ(errno, EKEYEXPIRED); ++ ++ /* update RX key */ ++ EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0); ++ ++ EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, TLS_PAYLOAD_MAX_LEN, 0), send_len); ++ EXPECT_EQ(read(p[0], mem_recv, send_len), send_len); ++ EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0); ++ } ++ ++ TEST_F(tls, rekey_peek_splice) ++ { ++ char const *test_str_1 = "test_message_before_rekey"; ++ struct tls_crypto_info_keys tls12; ++ int send_len; ++ char buf[100]; ++ char mem_recv[TLS_PAYLOAD_MAX_LEN]; ++ int p[2]; ++ ++ if (variant->tls_version != TLS_1_3_VERSION) ++ return; ++ ++ ASSERT_GE(pipe(p), 0); ++ ++ send_len = strlen(test_str_1) + 1; ++ EXPECT_EQ(send(self->fd, test_str_1, send_len, 0), send_len); ++ ++ /* update TX key */ ++ tls_send_keyupdate(_metadata, self->fd); ++ tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); ++ EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); ++ ++ EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_PEEK), send_len); ++ EXPECT_EQ(memcmp(buf, test_str_1, send_len), 0); ++ ++ EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, TLS_PAYLOAD_MAX_LEN, 0), send_len); ++ EXPECT_EQ(read(p[0], mem_recv, send_len), send_len); ++ EXPECT_EQ(memcmp(mem_recv, test_str_1, send_len), 0); ++ } ++ ++ TEST_F(tls, rekey_getsockopt) ++ { ++ struct tls_crypto_info_keys tls12; ++ struct tls_crypto_info_keys tls12_get; ++ socklen_t len; ++ ++ tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 0); ++ ++ len = tls12.len; ++ EXPECT_EQ(getsockopt(self->fd, SOL_TLS, TLS_TX, &tls12_get, &len), 0); ++ EXPECT_EQ(len, tls12.len); ++ EXPECT_EQ(memcmp(&tls12_get, &tls12, tls12.len), 0); ++ ++ len = tls12.len; ++ EXPECT_EQ(getsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12_get, &len), 0); ++ EXPECT_EQ(len, tls12.len); ++ EXPECT_EQ(memcmp(&tls12_get, &tls12, tls12.len), 0); ++ ++ if (variant->tls_version != TLS_1_3_VERSION) ++ return; ++ ++ tls_send_keyupdate(_metadata, self->fd); ++ tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); ++ EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); ++ ++ tls_recv_keyupdate(_metadata, self->cfd, 0); ++ EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0); ++ ++ len = tls12.len; ++ EXPECT_EQ(getsockopt(self->fd, SOL_TLS, TLS_TX, &tls12_get, &len), 0); ++ EXPECT_EQ(len, tls12.len); ++ EXPECT_EQ(memcmp(&tls12_get, &tls12, tls12.len), 0); ++ ++ len = tls12.len; ++ EXPECT_EQ(getsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12_get, &len), 0); ++ EXPECT_EQ(len, tls12.len); ++ EXPECT_EQ(memcmp(&tls12_get, &tls12, tls12.len), 0); ++ } ++ ++ TEST_F(tls, rekey_poll_pending) ++ { ++ char const *test_str = "test_message_after_rekey"; ++ struct tls_crypto_info_keys tls12; ++ struct pollfd pfd = { }; ++ int send_len; ++ int ret; ++ ++ if (variant->tls_version != TLS_1_3_VERSION) ++ return; ++ ++ /* update TX key */ ++ tls_send_keyupdate(_metadata, self->fd); ++ tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); ++ EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); ++ ++ /* get KeyUpdate */ ++ tls_recv_keyupdate(_metadata, self->cfd, 0); ++ ++ /* send immediately after rekey */ ++ send_len = strlen(test_str) + 1; ++ EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len); ++ ++ /* key hasn't been updated, expect cfd to be non-readable */ ++ pfd.fd = self->cfd; ++ pfd.events = POLLIN; ++ EXPECT_EQ(poll(&pfd, 1, 0), 0); ++ ++ ret = fork(); ++ ASSERT_GE(ret, 0); ++ ++ if (ret) { ++ int pid2, status; ++ ++ /* wait before installing the new key */ ++ sleep(1); ++ ++ /* update RX key while poll() is sleeping */ ++ EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0); ++ ++ pid2 = wait(&status); ++ EXPECT_EQ(pid2, ret); ++ EXPECT_EQ(status, 0); ++ } else { ++ pfd.fd = self->cfd; ++ pfd.events = POLLIN; ++ EXPECT_EQ(poll(&pfd, 1, 5000), 1); ++ ++ exit(!__test_passed(_metadata)); ++ } ++ } ++ ++ TEST_F(tls, rekey_poll_delay) ++ { ++ char const *test_str = "test_message_after_rekey"; ++ struct tls_crypto_info_keys tls12; ++ struct pollfd pfd = { }; ++ int send_len; ++ int ret; ++ ++ if (variant->tls_version != TLS_1_3_VERSION) ++ return; ++ ++ /* update TX key */ ++ tls_send_keyupdate(_metadata, self->fd); ++ tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); ++ EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); ++ ++ /* get KeyUpdate */ ++ tls_recv_keyupdate(_metadata, self->cfd, 0); ++ ++ ret = fork(); ++ ASSERT_GE(ret, 0); ++ ++ if (ret) { ++ int pid2, status; ++ ++ /* wait before installing the new key */ ++ sleep(1); ++ ++ /* update RX key while poll() is sleeping */ ++ EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0); ++ ++ sleep(1); ++ send_len = strlen(test_str) + 1; ++ EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len); ++ ++ pid2 = wait(&status); ++ EXPECT_EQ(pid2, ret); ++ EXPECT_EQ(status, 0); ++ } else { ++ pfd.fd = self->cfd; ++ pfd.events = POLLIN; ++ EXPECT_EQ(poll(&pfd, 1, 5000), 1); ++ exit(!__test_passed(_metadata)); ++ } ++ } ++ ++ struct raw_rec { ++ unsigned int plain_len; ++ unsigned char plain_data[100]; ++ unsigned int cipher_len; ++ unsigned char cipher_data[128]; ++ }; ++ ++ /* TLS 1.2, AES_CCM, data, seqno:0, plaintext: 'Hello world' */ ++ static const struct raw_rec id0_data_l11 = { ++ .plain_len = 11, ++ .plain_data = { ++ 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, ++ 0x72, 0x6c, 0x64, ++ }, ++ .cipher_len = 40, ++ .cipher_data = { ++ 0x17, 0x03, 0x03, 0x00, 0x23, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x26, 0xa2, 0x33, ++ 0xde, 0x8d, 0x94, 0xf0, 0x29, 0x6c, 0xb1, 0xaf, ++ 0x6a, 0x75, 0xb2, 0x93, 0xad, 0x45, 0xd5, 0xfd, ++ 0x03, 0x51, 0x57, 0x8f, 0xf9, 0xcc, 0x3b, 0x42, ++ }, ++ }; ++ ++ /* TLS 1.2, AES_CCM, ctrl, seqno:0, plaintext: '' */ ++ static const struct raw_rec id0_ctrl_l0 = { ++ .plain_len = 0, ++ .plain_data = { ++ }, ++ .cipher_len = 29, ++ .cipher_data = { ++ 0x16, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0x38, 0x7b, ++ 0xa6, 0x1c, 0xdd, 0xa7, 0x19, 0x33, 0xab, 0xae, ++ 0x88, 0xe1, 0xd2, 0x08, 0x4f, ++ }, ++ }; ++ ++ /* TLS 1.2, AES_CCM, data, seqno:0, plaintext: '' */ ++ static const struct raw_rec id0_data_l0 = { ++ .plain_len = 0, ++ .plain_data = { ++ }, ++ .cipher_len = 29, ++ .cipher_data = { ++ 0x17, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0xc5, 0x37, 0x90, ++ 0x70, 0x45, 0x89, 0xfb, 0x5c, 0xc7, 0x89, 0x03, ++ 0x68, 0x80, 0xd3, 0xd8, 0xcc, ++ }, ++ }; ++ ++ /* TLS 1.2, AES_CCM, data, seqno:1, plaintext: 'Hello world' */ ++ static const struct raw_rec id1_data_l11 = { ++ .plain_len = 11, ++ .plain_data = { ++ 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, ++ 0x72, 0x6c, 0x64, ++ }, ++ .cipher_len = 40, ++ .cipher_data = { ++ 0x17, 0x03, 0x03, 0x00, 0x23, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x01, 0x3a, 0x1a, 0x9c, ++ 0xd0, 0xa8, 0x9a, 0xd6, 0x69, 0xd6, 0x1a, 0xe3, ++ 0xb5, 0x1f, 0x0d, 0x2c, 0xe2, 0x97, 0x46, 0xff, ++ 0x2b, 0xcc, 0x5a, 0xc4, 0xa3, 0xb9, 0xef, 0xba, ++ }, ++ }; ++ ++ /* TLS 1.2, AES_CCM, ctrl, seqno:1, plaintext: '' */ ++ static const struct raw_rec id1_ctrl_l0 = { ++ .plain_len = 0, ++ .plain_data = { ++ }, ++ .cipher_len = 29, ++ .cipher_data = { ++ 0x16, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x01, 0x3e, 0xf0, 0xfe, ++ 0xee, 0xd9, 0xe2, 0x5d, 0xc7, 0x11, 0x4c, 0xe6, ++ 0xb4, 0x7e, 0xef, 0x40, 0x2b, ++ }, ++ }; ++ ++ /* TLS 1.2, AES_CCM, data, seqno:1, plaintext: '' */ ++ static const struct raw_rec id1_data_l0 = { ++ .plain_len = 0, ++ .plain_data = { ++ }, ++ .cipher_len = 29, ++ .cipher_data = { ++ 0x17, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x01, 0xce, 0xfc, 0x86, ++ 0xc8, 0xf0, 0x55, 0xf9, 0x47, 0x3f, 0x74, 0xdc, ++ 0xc9, 0xbf, 0xfe, 0x5b, 0xb1, ++ }, ++ }; ++ ++ /* TLS 1.2, AES_CCM, ctrl, seqno:2, plaintext: 'Hello world' */ ++ static const struct raw_rec id2_ctrl_l11 = { ++ .plain_len = 11, ++ .plain_data = { ++ 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, ++ 0x72, 0x6c, 0x64, ++ }, ++ .cipher_len = 40, ++ .cipher_data = { ++ 0x16, 0x03, 0x03, 0x00, 0x23, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x02, 0xe5, 0x3d, 0x19, ++ 0x3d, 0xca, 0xb8, 0x16, 0xb6, 0xff, 0x79, 0x87, ++ 0x2a, 0x04, 0x11, 0x3d, 0xf8, 0x64, 0x5f, 0x36, ++ 0x8b, 0xa8, 0xee, 0x4c, 0x6d, 0x62, 0xa5, 0x00, ++ }, ++ }; ++ ++ /* TLS 1.2, AES_CCM, data, seqno:2, plaintext: 'Hello world' */ ++ static const struct raw_rec id2_data_l11 = { ++ .plain_len = 11, ++ .plain_data = { ++ 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, ++ 0x72, 0x6c, 0x64, ++ }, ++ .cipher_len = 40, ++ .cipher_data = { ++ 0x17, 0x03, 0x03, 0x00, 0x23, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x02, 0xe5, 0x3d, 0x19, ++ 0x3d, 0xca, 0xb8, 0x16, 0xb6, 0xff, 0x79, 0x87, ++ 0x8e, 0xa1, 0xd0, 0xcd, 0x33, 0xb5, 0x86, 0x2b, ++ 0x17, 0xf1, 0x52, 0x2a, 0x55, 0x62, 0x65, 0x11, ++ }, ++ }; ++ ++ /* TLS 1.2, AES_CCM, ctrl, seqno:2, plaintext: '' */ ++ static const struct raw_rec id2_ctrl_l0 = { ++ .plain_len = 0, ++ .plain_data = { ++ }, ++ .cipher_len = 29, ++ .cipher_data = { ++ 0x16, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x02, 0xdc, 0x5c, 0x0e, ++ 0x41, 0xdd, 0xba, 0xd3, 0xcc, 0xcf, 0x6d, 0xd9, ++ 0x06, 0xdb, 0x79, 0xe5, 0x5d, ++ }, ++ }; ++ ++ /* TLS 1.2, AES_CCM, data, seqno:2, plaintext: '' */ ++ static const struct raw_rec id2_data_l0 = { ++ .plain_len = 0, ++ .plain_data = { ++ }, ++ .cipher_len = 29, ++ .cipher_data = { ++ 0x17, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x02, 0xc3, 0xca, 0x26, ++ 0x22, 0xe4, 0x25, 0xfb, 0x5f, 0x6d, 0xbf, 0x83, ++ 0x30, 0x48, 0x69, 0x1a, 0x47, ++ }, ++ }; ++ ++ FIXTURE(zero_len) ++ { ++ int fd, cfd; ++ bool notls; ++ }; ++ ++ FIXTURE_VARIANT(zero_len) ++ { ++ const struct raw_rec *recs[4]; ++ ssize_t recv_ret[4]; ++ }; ++ ++ FIXTURE_VARIANT_ADD(zero_len, data_data_data) ++ { ++ .recs = { &id0_data_l11, &id1_data_l11, &id2_data_l11, }, ++ .recv_ret = { 33, -EAGAIN, }, ++ }; ++ ++ FIXTURE_VARIANT_ADD(zero_len, data_0ctrl_data) ++ { ++ .recs = { &id0_data_l11, &id1_ctrl_l0, &id2_data_l11, }, ++ .recv_ret = { 11, 0, 11, -EAGAIN, }, ++ }; ++ ++ FIXTURE_VARIANT_ADD(zero_len, 0data_0data_0data) ++ { ++ .recs = { &id0_data_l0, &id1_data_l0, &id2_data_l0, }, ++ .recv_ret = { -EAGAIN, }, ++ }; ++ ++ FIXTURE_VARIANT_ADD(zero_len, 0data_0data_ctrl) ++ { ++ .recs = { &id0_data_l0, &id1_data_l0, &id2_ctrl_l11, }, ++ .recv_ret = { 0, 11, -EAGAIN, }, ++ }; ++ ++ FIXTURE_VARIANT_ADD(zero_len, 0data_0data_0ctrl) ++ { ++ .recs = { &id0_data_l0, &id1_data_l0, &id2_ctrl_l0, }, ++ .recv_ret = { 0, 0, -EAGAIN, }, ++ }; ++ ++ FIXTURE_VARIANT_ADD(zero_len, 0ctrl_0ctrl_0ctrl) ++ { ++ .recs = { &id0_ctrl_l0, &id1_ctrl_l0, &id2_ctrl_l0, }, ++ .recv_ret = { 0, 0, 0, -EAGAIN, }, ++ }; ++ ++ FIXTURE_VARIANT_ADD(zero_len, 0data_0data_data) ++ { ++ .recs = { &id0_data_l0, &id1_data_l0, &id2_data_l11, }, ++ .recv_ret = { 11, -EAGAIN, }, ++ }; ++ ++ FIXTURE_VARIANT_ADD(zero_len, data_0data_0data) ++ { ++ .recs = { &id0_data_l11, &id1_data_l0, &id2_data_l0, }, ++ .recv_ret = { 11, -EAGAIN, }, ++ }; ++ ++ FIXTURE_SETUP(zero_len) ++ { ++ struct tls_crypto_info_keys tls12; ++ int ret; ++ ++ tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_CCM_128, ++ &tls12, 0); ++ ++ ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls); ++ if (self->notls) ++ return; ++ ++ /* Don't install keys on fd, we'll send raw records */ ++ ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len); ++ ASSERT_EQ(ret, 0); ++ } ++ ++ FIXTURE_TEARDOWN(zero_len) ++ { ++ close(self->fd); ++ close(self->cfd); ++ } ++ ++ TEST_F(zero_len, test) ++ { ++ const struct raw_rec *const *rec; ++ unsigned char buf[128]; ++ int rec_off; ++ int i; ++ ++ for (i = 0; i < 4 && variant->recs[i]; i++) ++ EXPECT_EQ(send(self->fd, variant->recs[i]->cipher_data, ++ variant->recs[i]->cipher_len, 0), ++ variant->recs[i]->cipher_len); ++ ++ rec = &variant->recs[0]; ++ rec_off = 0; ++ for (i = 0; i < 4; i++) { ++ int j, ret; ++ ++ ret = variant->recv_ret[i] >= 0 ? variant->recv_ret[i] : -1; ++ EXPECT_EQ(__tls_recv_cmsg(_metadata, self->cfd, NULL, ++ buf, sizeof(buf), MSG_DONTWAIT), ret); ++ if (ret == -1) ++ EXPECT_EQ(errno, -variant->recv_ret[i]); ++ if (variant->recv_ret[i] == -EAGAIN) ++ break; ++ ++ for (j = 0; j < ret; j++) { ++ while (rec_off == (*rec)->plain_len) { ++ rec++; ++ rec_off = 0; ++ } ++ EXPECT_EQ(buf[j], (*rec)->plain_data[rec_off]); ++ rec_off++; ++ } ++ } ++ }; ++ +++>>>>>>> a61a3e961baf (selftests: tls: add tests for zero-length records) + FIXTURE(tls_err) + { + int fd, cfd; +* Unmerged path tools/testing/selftests/net/tls.c diff --git a/ciq/ciq_backports/kernel-6.12.0-55.37.1.el10_0/cbe4134e.failed b/ciq/ciq_backports/kernel-6.12.0-55.37.1.el10_0/cbe4134e.failed new file mode 100644 index 0000000000000..ceec9fbe6e34f --- /dev/null +++ b/ciq/ciq_backports/kernel-6.12.0-55.37.1.el10_0/cbe4134e.failed @@ -0,0 +1,135 @@ +fs: export anon_inode_make_secure_inode() and fix secretmem LSM bypass + +jira LE-4297 +cve CVE-2025-38396 +Rebuild_History Non-Buildable kernel-6.12.0-55.37.1.el10_0 +commit-author Shivank Garg +commit cbe4134ea4bc493239786220bd69cb8a13493190 +Empty-Commit: Cherry-Pick Conflicts during history rebuild. +Will be included in final tarball splat. Ref for failed cherry-pick at: +ciq/ciq_backports/kernel-6.12.0-55.37.1.el10_0/cbe4134e.failed + +Export anon_inode_make_secure_inode() to allow KVM guest_memfd to create +anonymous inodes with proper security context. This replaces the current +pattern of calling alloc_anon_inode() followed by +inode_init_security_anon() for creating security context manually. + +This change also fixes a security regression in secretmem where the +S_PRIVATE flag was not cleared after alloc_anon_inode(), causing +LSM/SELinux checks to be bypassed for secretmem file descriptors. + +As guest_memfd currently resides in the KVM module, we need to export this +symbol for use outside the core kernel. In the future, guest_memfd might be +moved to core-mm, at which point the symbols no longer would have to be +exported. When/if that happens is still unclear. + +Fixes: 2bfe15c52612 ("mm: create security context for memfd_secret inodes") + Suggested-by: David Hildenbrand + Suggested-by: Mike Rapoport + Signed-off-by: Shivank Garg +Link: https://lore.kernel.org/20250620070328.803704-3-shivankg@amd.com + Acked-by: "Mike Rapoport (Microsoft)" + Signed-off-by: Christian Brauner +(cherry picked from commit cbe4134ea4bc493239786220bd69cb8a13493190) + Signed-off-by: Jonathan Maple + +# Conflicts: +# mm/secretmem.c +diff --cc mm/secretmem.c +index 399552814fd0,9a11a38a6770..000000000000 +--- a/mm/secretmem.c ++++ b/mm/secretmem.c +@@@ -195,19 -195,11 +195,25 @@@ static struct file *secretmem_file_crea + struct file *file; + struct inode *inode; + const char *anon_name = "[secretmem]"; +++<<<<<<< HEAD + + const struct qstr qname = QSTR_INIT(anon_name, strlen(anon_name)); + + int err; +++======= +++>>>>>>> cbe4134ea4bc (fs: export anon_inode_make_secure_inode() and fix secretmem LSM bypass) + +- inode = alloc_anon_inode(secretmem_mnt->mnt_sb); ++ inode = anon_inode_make_secure_inode(secretmem_mnt->mnt_sb, anon_name, NULL); + if (IS_ERR(inode)) + return ERR_CAST(inode); + +++<<<<<<< HEAD + + err = security_inode_init_security_anon(inode, &qname, NULL); + + if (err) { + + file = ERR_PTR(err); + + goto err_free_inode; + + } + + +++======= +++>>>>>>> cbe4134ea4bc (fs: export anon_inode_make_secure_inode() and fix secretmem LSM bypass) + file = alloc_file_pseudo(inode, secretmem_mnt, "secretmem", + O_RDWR, &secretmem_fops); + if (IS_ERR(file)) +diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c +index 42bd1cb7c9cd..0c45281c57d8 100644 +--- a/fs/anon_inodes.c ++++ b/fs/anon_inodes.c +@@ -55,15 +55,26 @@ static struct file_system_type anon_inode_fs_type = { + .kill_sb = kill_anon_super, + }; + +-static struct inode *anon_inode_make_secure_inode( +- const char *name, +- const struct inode *context_inode) ++/** ++ * anon_inode_make_secure_inode - allocate an anonymous inode with security context ++ * @sb: [in] Superblock to allocate from ++ * @name: [in] Name of the class of the newfile (e.g., "secretmem") ++ * @context_inode: ++ * [in] Optional parent inode for security inheritance ++ * ++ * The function ensures proper security initialization through the LSM hook ++ * security_inode_init_security_anon(). ++ * ++ * Return: Pointer to new inode on success, ERR_PTR on failure. ++ */ ++struct inode *anon_inode_make_secure_inode(struct super_block *sb, const char *name, ++ const struct inode *context_inode) + { + struct inode *inode; + const struct qstr qname = QSTR_INIT(name, strlen(name)); + int error; + +- inode = alloc_anon_inode(anon_inode_mnt->mnt_sb); ++ inode = alloc_anon_inode(sb); + if (IS_ERR(inode)) + return inode; + inode->i_flags &= ~S_PRIVATE; +@@ -74,6 +85,7 @@ static struct inode *anon_inode_make_secure_inode( + } + return inode; + } ++EXPORT_SYMBOL_GPL_FOR_MODULES(anon_inode_make_secure_inode, "kvm"); + + static struct file *__anon_inode_getfile(const char *name, + const struct file_operations *fops, +@@ -88,7 +100,8 @@ static struct file *__anon_inode_getfile(const char *name, + return ERR_PTR(-ENOENT); + + if (make_inode) { +- inode = anon_inode_make_secure_inode(name, context_inode); ++ inode = anon_inode_make_secure_inode(anon_inode_mnt->mnt_sb, ++ name, context_inode); + if (IS_ERR(inode)) { + file = ERR_CAST(inode); + goto err; +diff --git a/include/linux/fs.h b/include/linux/fs.h +index 3559446279c1..4d01ec8c0a58 100644 +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -3405,6 +3405,8 @@ extern int simple_write_begin(struct file *file, struct address_space *mapping, + extern const struct address_space_operations ram_aops; + extern int always_delete_dentry(const struct dentry *); + extern struct inode *alloc_anon_inode(struct super_block *); ++struct inode *anon_inode_make_secure_inode(struct super_block *sb, const char *name, ++ const struct inode *context_inode); + extern int simple_nosetlease(struct file *, int, struct file_lease **, void **); + extern const struct dentry_operations simple_dentry_operations; + +* Unmerged path mm/secretmem.c diff --git a/ciq/ciq_backports/kernel-6.12.0-55.37.1.el10_0/rebuild.details.txt b/ciq/ciq_backports/kernel-6.12.0-55.37.1.el10_0/rebuild.details.txt new file mode 100644 index 0000000000000..9c3e0fc3adcd8 --- /dev/null +++ b/ciq/ciq_backports/kernel-6.12.0-55.37.1.el10_0/rebuild.details.txt @@ -0,0 +1,21 @@ +Rebuild_History BUILDABLE +Rebuilding Kernel from rpm changelog with Fuzz Limit: 87.50% +Number of commits in upstream range v6.12~1..kernel-mainline: 66177 +Number of commits in rpm: 25 +Number of commits matched with upstream: 22 (88.00%) +Number of commits in upstream but not in rpm: 66155 +Number of commits NOT found in upstream: 3 (12.00%) + +Rebuilding Kernel on Branch rocky10_0_rebuild_kernel-6.12.0-55.37.1.el10_0 for kernel-6.12.0-55.37.1.el10_0 +Clean Cherry Picks: 20 (90.91%) +Empty Cherry Picks: 2 (9.09%) +_______________________________ + +__EMPTY COMMITS__________________________ +cbe4134ea4bc493239786220bd69cb8a13493190 fs: export anon_inode_make_secure_inode() and fix secretmem LSM bypass +a61a3e961baff65b0a49f862fe21ce304f279b24 selftests: tls: add tests for zero-length records + +__CHANGES NOT IN UPSTREAM________________ +Porting to Rocky Linux 10, debranding and Rocky Linux branding' +Add partial riscv64 support for build root' +Provide basic VisionFive 2 support' diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index dc9a48440d2f5..649cdfb5f8607 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -1407,17 +1407,27 @@ static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq) } } +/* Must be called when queue is frozen */ +static bool ublk_mark_queue_canceling(struct ublk_queue *ubq) +{ + bool canceled; + + spin_lock(&ubq->cancel_lock); + canceled = ubq->canceling; + if (!canceled) + ubq->canceling = true; + spin_unlock(&ubq->cancel_lock); + + return canceled; +} + static bool ublk_abort_requests(struct ublk_device *ub, struct ublk_queue *ubq) { + bool was_canceled = ubq->canceling; struct gendisk *disk; - spin_lock(&ubq->cancel_lock); - if (ubq->canceling) { - spin_unlock(&ubq->cancel_lock); + if (was_canceled) return false; - } - ubq->canceling = true; - spin_unlock(&ubq->cancel_lock); spin_lock(&ub->lock); disk = ub->ub_disk; @@ -1429,14 +1439,23 @@ static bool ublk_abort_requests(struct ublk_device *ub, struct ublk_queue *ubq) if (!disk) return false; - /* Now we are serialized with ublk_queue_rq() */ + /* + * Now we are serialized with ublk_queue_rq() + * + * Make sure that ubq->canceling is set when queue is frozen, + * because ublk_queue_rq() has to rely on this flag for avoiding to + * touch completed uring_cmd + */ blk_mq_quiesce_queue(disk->queue); - /* abort queue is for making forward progress */ - ublk_abort_queue(ub, ubq); + was_canceled = ublk_mark_queue_canceling(ubq); + if (!was_canceled) { + /* abort queue is for making forward progress */ + ublk_abort_queue(ub, ubq); + } blk_mq_unquiesce_queue(disk->queue); put_device(disk_to_dev(disk)); - return true; + return !was_canceled; } static void ublk_cancel_cmd(struct ublk_queue *ubq, struct ublk_io *io, diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 1b87b600197de..c7179886ed75f 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -758,7 +758,7 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev) dev_kfree_skb_any(skb); tx_kick_pending: if (BNXT_TX_PTP_IS_SET(lflags)) { - txr->tx_buf_ring[txr->tx_prod].is_ts_pkt = 0; + txr->tx_buf_ring[RING_TX(bp, txr->tx_prod)].is_ts_pkt = 0; atomic64_inc(&bp->ptp_cfg->stats.ts_err); if (!(bp->fw_cap & BNXT_FW_CAP_TX_TS_CMP)) /* set SKB to err so PTP worker will clean up */ @@ -766,7 +766,7 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev) } if (txr->kick_pending) bnxt_txr_db_kick(bp, txr, txr->tx_prod); - txr->tx_buf_ring[txr->tx_prod].skb = NULL; + txr->tx_buf_ring[RING_TX(bp, txr->tx_prod)].skb = NULL; dev_core_stats_tx_dropped_inc(dev); return NETDEV_TX_OK; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 1120f8e4bb670..bbeb93d84bc92 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -3137,10 +3137,10 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg) const u8 *addr = al->list[i].addr; /* Allow to delete VF primary MAC only if it was not set - * administratively by PF or if VF is trusted. + * administratively by PF. */ if (ether_addr_equal(addr, vf->default_lan_addr.addr)) { - if (i40e_can_vf_change_mac(vf)) + if (!vf->pf_set_mac) was_unimac_deleted = true; else continue; @@ -5003,7 +5003,7 @@ int i40e_get_vf_stats(struct net_device *netdev, int vf_id, vf_stats->broadcast = stats->rx_broadcast; vf_stats->multicast = stats->rx_multicast; vf_stats->rx_dropped = stats->rx_discards + stats->rx_discards_other; - vf_stats->tx_dropped = stats->tx_discards; + vf_stats->tx_dropped = stats->tx_errors; return 0; } diff --git a/drivers/net/ethernet/intel/ice/ice_adapter.c b/drivers/net/ethernet/intel/ice/ice_adapter.c index 01a08cfd0090a..10285995c9edd 100644 --- a/drivers/net/ethernet/intel/ice/ice_adapter.c +++ b/drivers/net/ethernet/intel/ice/ice_adapter.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only // SPDX-FileCopyrightText: Copyright Red Hat -#include #include #include #include @@ -14,32 +13,45 @@ static DEFINE_XARRAY(ice_adapters); static DEFINE_MUTEX(ice_adapters_mutex); -/* PCI bus number is 8 bits. Slot is 5 bits. Domain can have the rest. */ -#define INDEX_FIELD_DOMAIN GENMASK(BITS_PER_LONG - 1, 13) -#define INDEX_FIELD_DEV GENMASK(31, 16) -#define INDEX_FIELD_BUS GENMASK(12, 5) -#define INDEX_FIELD_SLOT GENMASK(4, 0) +#define ICE_ADAPTER_FIXED_INDEX BIT_ULL(63) -static unsigned long ice_adapter_index(const struct pci_dev *pdev) -{ - unsigned int domain = pci_domain_nr(pdev->bus); - - WARN_ON(domain > FIELD_MAX(INDEX_FIELD_DOMAIN)); +#define ICE_ADAPTER_INDEX_E825C \ + (ICE_DEV_ID_E825C_BACKPLANE | ICE_ADAPTER_FIXED_INDEX) +static u64 ice_adapter_index(struct pci_dev *pdev) +{ switch (pdev->device) { case ICE_DEV_ID_E825C_BACKPLANE: case ICE_DEV_ID_E825C_QSFP: case ICE_DEV_ID_E825C_SFP: case ICE_DEV_ID_E825C_SGMII: - return FIELD_PREP(INDEX_FIELD_DEV, pdev->device); + /* E825C devices have multiple NACs which are connected to the + * same clock source, and which must share the same + * ice_adapter structure. We can't use the serial number since + * each NAC has its own NVM generated with its own unique + * Device Serial Number. Instead, rely on the embedded nature + * of the E825C devices, and use a fixed index. This relies on + * the fact that all E825C physical functions in a given + * system are part of the same overall device. + */ + return ICE_ADAPTER_INDEX_E825C; default: - return FIELD_PREP(INDEX_FIELD_DOMAIN, domain) | - FIELD_PREP(INDEX_FIELD_BUS, pdev->bus->number) | - FIELD_PREP(INDEX_FIELD_SLOT, PCI_SLOT(pdev->devfn)); + return pci_get_dsn(pdev) & ~ICE_ADAPTER_FIXED_INDEX; } } -static struct ice_adapter *ice_adapter_new(void) +static unsigned long ice_adapter_xa_index(struct pci_dev *pdev) +{ + u64 index = ice_adapter_index(pdev); + +#if BITS_PER_LONG == 64 + return index; +#else + return (u32)index ^ (u32)(index >> 32); +#endif +} + +static struct ice_adapter *ice_adapter_new(struct pci_dev *pdev) { struct ice_adapter *adapter; @@ -47,6 +59,7 @@ static struct ice_adapter *ice_adapter_new(void) if (!adapter) return NULL; + adapter->index = ice_adapter_index(pdev); spin_lock_init(&adapter->ptp_gltsyn_time_lock); refcount_set(&adapter->refcount, 1); @@ -77,23 +90,25 @@ static void ice_adapter_free(struct ice_adapter *adapter) * Return: Pointer to ice_adapter on success. * ERR_PTR() on error. -ENOMEM is the only possible error. */ -struct ice_adapter *ice_adapter_get(const struct pci_dev *pdev) +struct ice_adapter *ice_adapter_get(struct pci_dev *pdev) { - unsigned long index = ice_adapter_index(pdev); struct ice_adapter *adapter; + unsigned long index; int err; + index = ice_adapter_xa_index(pdev); scoped_guard(mutex, &ice_adapters_mutex) { err = xa_insert(&ice_adapters, index, NULL, GFP_KERNEL); if (err == -EBUSY) { adapter = xa_load(&ice_adapters, index); refcount_inc(&adapter->refcount); + WARN_ON_ONCE(adapter->index != ice_adapter_index(pdev)); return adapter; } if (err) return ERR_PTR(err); - adapter = ice_adapter_new(); + adapter = ice_adapter_new(pdev); if (!adapter) return ERR_PTR(-ENOMEM); xa_store(&ice_adapters, index, adapter, GFP_KERNEL); @@ -110,11 +125,12 @@ struct ice_adapter *ice_adapter_get(const struct pci_dev *pdev) * * Context: Process, may sleep. */ -void ice_adapter_put(const struct pci_dev *pdev) +void ice_adapter_put(struct pci_dev *pdev) { - unsigned long index = ice_adapter_index(pdev); struct ice_adapter *adapter; + unsigned long index; + index = ice_adapter_xa_index(pdev); scoped_guard(mutex, &ice_adapters_mutex) { adapter = xa_load(&ice_adapters, index); if (WARN_ON(!adapter)) diff --git a/drivers/net/ethernet/intel/ice/ice_adapter.h b/drivers/net/ethernet/intel/ice/ice_adapter.h index e233225848b38..409467847c753 100644 --- a/drivers/net/ethernet/intel/ice/ice_adapter.h +++ b/drivers/net/ethernet/intel/ice/ice_adapter.h @@ -32,6 +32,7 @@ struct ice_port_list { * @refcount: Reference count. struct ice_pf objects hold the references. * @ctrl_pf: Control PF of the adapter * @ports: Ports list + * @index: 64-bit index cached for collision detection on 32bit systems */ struct ice_adapter { refcount_t refcount; @@ -40,9 +41,10 @@ struct ice_adapter { struct ice_pf *ctrl_pf; struct ice_port_list ports; + u64 index; }; -struct ice_adapter *ice_adapter_get(const struct pci_dev *pdev); -void ice_adapter_put(const struct pci_dev *pdev); +struct ice_adapter *ice_adapter_get(struct pci_dev *pdev); +void ice_adapter_put(struct pci_dev *pdev); #endif /* _ICE_ADAPTER_H */ diff --git a/drivers/net/ethernet/intel/idpf/idpf_controlq.c b/drivers/net/ethernet/intel/idpf/idpf_controlq.c index 4849590a5591f..f29f94a1d7f2b 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_controlq.c +++ b/drivers/net/ethernet/intel/idpf/idpf_controlq.c @@ -96,7 +96,7 @@ static void idpf_ctlq_init_rxq_bufs(struct idpf_ctlq_info *cq) */ static void idpf_ctlq_shutdown(struct idpf_hw *hw, struct idpf_ctlq_info *cq) { - mutex_lock(&cq->cq_lock); + spin_lock(&cq->cq_lock); /* free ring buffers and the ring itself */ idpf_ctlq_dealloc_ring_res(hw, cq); @@ -104,8 +104,7 @@ static void idpf_ctlq_shutdown(struct idpf_hw *hw, struct idpf_ctlq_info *cq) /* Set ring_size to 0 to indicate uninitialized queue */ cq->ring_size = 0; - mutex_unlock(&cq->cq_lock); - mutex_destroy(&cq->cq_lock); + spin_unlock(&cq->cq_lock); } /** @@ -173,7 +172,7 @@ int idpf_ctlq_add(struct idpf_hw *hw, idpf_ctlq_init_regs(hw, cq, is_rxq); - mutex_init(&cq->cq_lock); + spin_lock_init(&cq->cq_lock); list_add(&cq->cq_list, &hw->cq_list_head); @@ -272,7 +271,7 @@ int idpf_ctlq_send(struct idpf_hw *hw, struct idpf_ctlq_info *cq, int err = 0; int i; - mutex_lock(&cq->cq_lock); + spin_lock(&cq->cq_lock); /* Ensure there are enough descriptors to send all messages */ num_desc_avail = IDPF_CTLQ_DESC_UNUSED(cq); @@ -332,7 +331,7 @@ int idpf_ctlq_send(struct idpf_hw *hw, struct idpf_ctlq_info *cq, wr32(hw, cq->reg.tail, cq->next_to_use); err_unlock: - mutex_unlock(&cq->cq_lock); + spin_unlock(&cq->cq_lock); return err; } @@ -364,7 +363,7 @@ int idpf_ctlq_clean_sq(struct idpf_ctlq_info *cq, u16 *clean_count, if (*clean_count > cq->ring_size) return -EBADR; - mutex_lock(&cq->cq_lock); + spin_lock(&cq->cq_lock); ntc = cq->next_to_clean; @@ -394,7 +393,7 @@ int idpf_ctlq_clean_sq(struct idpf_ctlq_info *cq, u16 *clean_count, cq->next_to_clean = ntc; - mutex_unlock(&cq->cq_lock); + spin_unlock(&cq->cq_lock); /* Return number of descriptors actually cleaned */ *clean_count = i; @@ -432,7 +431,7 @@ int idpf_ctlq_post_rx_buffs(struct idpf_hw *hw, struct idpf_ctlq_info *cq, if (*buff_count > 0) buffs_avail = true; - mutex_lock(&cq->cq_lock); + spin_lock(&cq->cq_lock); if (tbp >= cq->ring_size) tbp = 0; @@ -521,7 +520,7 @@ int idpf_ctlq_post_rx_buffs(struct idpf_hw *hw, struct idpf_ctlq_info *cq, wr32(hw, cq->reg.tail, cq->next_to_post); } - mutex_unlock(&cq->cq_lock); + spin_unlock(&cq->cq_lock); /* return the number of buffers that were not posted */ *buff_count = *buff_count - i; @@ -549,7 +548,7 @@ int idpf_ctlq_recv(struct idpf_ctlq_info *cq, u16 *num_q_msg, u16 i; /* take the lock before we start messing with the ring */ - mutex_lock(&cq->cq_lock); + spin_lock(&cq->cq_lock); ntc = cq->next_to_clean; @@ -608,7 +607,7 @@ int idpf_ctlq_recv(struct idpf_ctlq_info *cq, u16 *num_q_msg, cq->next_to_clean = ntc; - mutex_unlock(&cq->cq_lock); + spin_unlock(&cq->cq_lock); *num_q_msg = i; if (*num_q_msg == 0) diff --git a/drivers/net/ethernet/intel/idpf/idpf_controlq_api.h b/drivers/net/ethernet/intel/idpf/idpf_controlq_api.h index e8e046ef2f0d7..5890d8adca4a8 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_controlq_api.h +++ b/drivers/net/ethernet/intel/idpf/idpf_controlq_api.h @@ -99,7 +99,7 @@ struct idpf_ctlq_info { enum idpf_ctlq_type cq_type; int q_id; - struct mutex cq_lock; /* control queue lock */ + spinlock_t cq_lock; /* control queue lock */ /* used for interrupt processing */ u16 next_to_use; u16 next_to_clean; diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index a3d6b8f198a86..5393d05402fc9 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -2321,8 +2321,12 @@ void *idpf_alloc_dma_mem(struct idpf_hw *hw, struct idpf_dma_mem *mem, u64 size) struct idpf_adapter *adapter = hw->back; size_t sz = ALIGN(size, 4096); - mem->va = dma_alloc_coherent(&adapter->pdev->dev, sz, - &mem->pa, GFP_KERNEL); + /* The control queue resources are freed under a spinlock, contiguous + * pages will avoid IOMMU remapping and the use vmap (and vunmap in + * dma_free_*() path. + */ + mem->va = dma_alloc_attrs(&adapter->pdev->dev, sz, &mem->pa, + GFP_KERNEL, DMA_ATTR_FORCE_CONTIGUOUS); mem->size = sz; return mem->va; @@ -2337,8 +2341,8 @@ void idpf_free_dma_mem(struct idpf_hw *hw, struct idpf_dma_mem *mem) { struct idpf_adapter *adapter = hw->back; - dma_free_coherent(&adapter->pdev->dev, mem->size, - mem->va, mem->pa); + dma_free_attrs(&adapter->pdev->dev, mem->size, + mem->va, mem->pa, DMA_ATTR_FORCE_CONTIGUOUS); mem->size = 0; mem->va = NULL; mem->pa = 0; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 7bf275f127c9d..dffe27cca025f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -4217,8 +4217,8 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) struct stmmac_txq_stats *txq_stats; struct stmmac_tx_queue *tx_q; u32 pay_len, mss, queue; + dma_addr_t tso_des, des; u8 proto_hdr_len, hdr; - dma_addr_t des; bool set_ic; int i; @@ -4314,14 +4314,15 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) /* If needed take extra descriptors to fill the remaining payload */ tmp_pay_len = pay_len - TSO_MAX_BUFF_SIZE; + tso_des = des; } else { stmmac_set_desc_addr(priv, first, des); tmp_pay_len = pay_len; - des += proto_hdr_len; + tso_des = des + proto_hdr_len; pay_len = 0; } - stmmac_tso_allocator(priv, des, tmp_pay_len, (nfrags == 0), queue); + stmmac_tso_allocator(priv, tso_des, tmp_pay_len, (nfrags == 0), queue); /* In case two or more DMA transmit descriptors are allocated for this * non-paged SKB data, the DMA buffer address should be saved to diff --git a/drivers/s390/char/sclp.c b/drivers/s390/char/sclp.c index fbffd451031fd..9c2abf39dfde8 100644 --- a/drivers/s390/char/sclp.c +++ b/drivers/s390/char/sclp.c @@ -76,6 +76,13 @@ unsigned long sclp_console_full; /* The currently active SCLP command word. */ static sclp_cmdw_t active_cmd; +static inline struct sccb_header *sclpint_to_sccb(u32 sccb_int) +{ + if (sccb_int) + return __va(sccb_int); + return NULL; +} + static inline void sclp_trace(int prio, char *id, u32 a, u64 b, bool err) { struct sclp_trace_entry e; @@ -620,7 +627,7 @@ __sclp_find_req(u32 sccb) static bool ok_response(u32 sccb_int, sclp_cmdw_t cmd) { - struct sccb_header *sccb = (struct sccb_header *)__va(sccb_int); + struct sccb_header *sccb = sclpint_to_sccb(sccb_int); struct evbuf_header *evbuf; u16 response; @@ -659,7 +666,7 @@ static void sclp_interrupt_handler(struct ext_code ext_code, /* INT: Interrupt received (a=intparm, b=cmd) */ sclp_trace_sccb(0, "INT", param32, active_cmd, active_cmd, - (struct sccb_header *)__va(finished_sccb), + sclpint_to_sccb(finished_sccb), !ok_response(finished_sccb, active_cmd)); if (finished_sccb) { diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 874644b31a3eb..be7b016383c68 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -6004,9 +6004,9 @@ lpfc_sli4_get_ctl_attr(struct lpfc_hba *phba) phba->sli4_hba.flash_id = bf_get(lpfc_cntl_attr_flash_id, cntl_attr); phba->sli4_hba.asic_rev = bf_get(lpfc_cntl_attr_asic_rev, cntl_attr); - memset(phba->BIOSVersion, 0, sizeof(phba->BIOSVersion)); - strlcat(phba->BIOSVersion, (char *)cntl_attr->bios_ver_str, + memcpy(phba->BIOSVersion, cntl_attr->bios_ver_str, sizeof(phba->BIOSVersion)); + phba->BIOSVersion[sizeof(phba->BIOSVersion) - 1] = '\0'; lpfc_printf_log(phba, KERN_INFO, LOG_SLI, "3086 lnk_type:%d, lnk_numb:%d, bios_ver:%s, " diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 4959c26d3b71b..8a5cd8c51e0b2 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -4489,6 +4489,12 @@ static irqreturn_t dwc3_check_event_buf(struct dwc3_event_buffer *evt) if (!count) return IRQ_NONE; + if (count > evt->length) { + dev_err_ratelimited(dwc->dev, "invalid count(%u) > evt->length(%u)\n", + count, evt->length); + return IRQ_NONE; + } + evt->count = count; evt->flags |= DWC3_EVENT_PENDING; diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index 42bd1cb7c9cdd..5a070be69922a 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -55,15 +55,26 @@ static struct file_system_type anon_inode_fs_type = { .kill_sb = kill_anon_super, }; -static struct inode *anon_inode_make_secure_inode( - const char *name, - const struct inode *context_inode) +/** + * anon_inode_make_secure_inode - allocate an anonymous inode with security context + * @sb: [in] Superblock to allocate from + * @name: [in] Name of the class of the newfile (e.g., "secretmem") + * @context_inode: + * [in] Optional parent inode for security inheritance + * + * The function ensures proper security initialization through the LSM hook + * security_inode_init_security_anon(). + * + * Return: Pointer to new inode on success, ERR_PTR on failure. + */ +struct inode *anon_inode_make_secure_inode(struct super_block *sb, const char *name, + const struct inode *context_inode) { struct inode *inode; const struct qstr qname = QSTR_INIT(name, strlen(name)); int error; - inode = alloc_anon_inode(anon_inode_mnt->mnt_sb); + inode = alloc_anon_inode(sb); if (IS_ERR(inode)) return inode; inode->i_flags &= ~S_PRIVATE; @@ -74,6 +85,7 @@ static struct inode *anon_inode_make_secure_inode( } return inode; } +EXPORT_SYMBOL_GPL(anon_inode_make_secure_inode); static struct file *__anon_inode_getfile(const char *name, const struct file_operations *fops, @@ -88,7 +100,8 @@ static struct file *__anon_inode_getfile(const char *name, return ERR_PTR(-ENOENT); if (make_inode) { - inode = anon_inode_make_secure_inode(name, context_inode); + inode = anon_inode_make_secure_inode(anon_inode_mnt->mnt_sb, + name, context_inode); if (IS_ERR(inode)) { file = ERR_CAST(inode); goto err; diff --git a/fs/namespace.c b/fs/namespace.c index d26f5e6d2ca35..da767032a0a15 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2673,6 +2673,19 @@ static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp) return attach_recursive_mnt(mnt, p, mp, 0); } +static int may_change_propagation(const struct mount *m) +{ + struct mnt_namespace *ns = m->mnt_ns; + + // it must be mounted in some namespace + if (IS_ERR_OR_NULL(ns)) // is_mounted() + return -EINVAL; + // and the caller must be admin in userns of that namespace + if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) + return -EPERM; + return 0; +} + /* * Sanity check the flags to change_mnt_propagation. */ @@ -2709,6 +2722,10 @@ static int do_change_type(struct path *path, int ms_flags) return -EINVAL; namespace_lock(); + err = may_change_propagation(mnt); + if (err) + goto out_unlock; + if (type == MS_SHARED) { err = invent_group_ids(mnt, recurse); if (err) @@ -3102,18 +3119,11 @@ static int do_set_group(struct path *from_path, struct path *to_path) namespace_lock(); - err = -EINVAL; - /* To and From must be mounted */ - if (!is_mounted(&from->mnt)) - goto out; - if (!is_mounted(&to->mnt)) - goto out; - - err = -EPERM; - /* We should be allowed to modify mount namespaces of both mounts */ - if (!ns_capable(from->mnt_ns->user_ns, CAP_SYS_ADMIN)) + err = may_change_propagation(from); + if (err) goto out; - if (!ns_capable(to->mnt_ns->user_ns, CAP_SYS_ADMIN)) + err = may_change_propagation(to); + if (err) goto out; err = -EINVAL; diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c index e03c890de0a06..c0196be0e65fc 100644 --- a/fs/smb/client/cifs_debug.c +++ b/fs/smb/client/cifs_debug.c @@ -362,6 +362,10 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) c = 0; spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) { +#ifdef CONFIG_CIFS_SMB_DIRECT + struct smbdirect_socket_parameters *sp; +#endif + /* channel info will be printed as a part of sessions below */ if (SERVER_IS_CHAN(server)) continue; @@ -383,25 +387,26 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) seq_printf(m, "\nSMBDirect transport not available"); goto skip_rdma; } + sp = &server->smbd_conn->socket.parameters; seq_printf(m, "\nSMBDirect (in hex) protocol version: %x " "transport status: %x", server->smbd_conn->protocol, - server->smbd_conn->transport_status); + server->smbd_conn->socket.status); seq_printf(m, "\nConn receive_credit_max: %x " "send_credit_target: %x max_send_size: %x", - server->smbd_conn->receive_credit_max, - server->smbd_conn->send_credit_target, - server->smbd_conn->max_send_size); + sp->recv_credit_max, + sp->send_credit_target, + sp->max_send_size); seq_printf(m, "\nConn max_fragmented_recv_size: %x " "max_fragmented_send_size: %x max_receive_size:%x", - server->smbd_conn->max_fragmented_recv_size, - server->smbd_conn->max_fragmented_send_size, - server->smbd_conn->max_receive_size); + sp->max_fragmented_recv_size, + sp->max_fragmented_send_size, + sp->max_recv_size); seq_printf(m, "\nConn keep_alive_interval: %x " "max_readwrite_size: %x rdma_readwrite_threshold: %x", - server->smbd_conn->keep_alive_interval, - server->smbd_conn->max_readwrite_size, + sp->keepalive_interval_msec * 1000, + sp->max_read_write_size, server->smbd_conn->rdma_readwrite_threshold); seq_printf(m, "\nDebug count_get_receive_buffer: %x " "count_put_receive_buffer: %x count_send_empty: %x", diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index 3b2d33291a7e6..f28ceb6c73a3e 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -3051,7 +3051,8 @@ void cifs_oplock_break(struct work_struct *work) struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo, oplock_break); struct inode *inode = d_inode(cfile->dentry); - struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct super_block *sb = inode->i_sb; + struct cifs_sb_info *cifs_sb = CIFS_SB(sb); struct cifsInodeInfo *cinode = CIFS_I(inode); struct cifs_tcon *tcon; struct TCP_Server_Info *server; @@ -3061,6 +3062,12 @@ void cifs_oplock_break(struct work_struct *work) __u64 persistent_fid, volatile_fid; __u16 net_fid; + /* + * Hold a reference to the superblock to prevent it and its inodes from + * being freed while we are accessing cinode. Otherwise, _cifsFileInfo_put() + * may release the last reference to the sb and trigger inode eviction. + */ + cifs_sb_active(sb); wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS, TASK_UNINTERRUPTIBLE); @@ -3133,6 +3140,7 @@ void cifs_oplock_break(struct work_struct *work) cifs_put_tlink(tlink); out: cifs_done_oplock_break(cinode); + cifs_sb_deactive(sb); } static int cifs_swap_activate(struct swap_info_struct *sis, diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 7f90d2401749c..9ef1e33cc7d78 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -496,6 +496,9 @@ smb3_negotiate_wsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) wsize = min_t(unsigned int, wsize, server->max_write); #ifdef CONFIG_CIFS_SMB_DIRECT if (server->rdma) { + struct smbdirect_socket_parameters *sp = + &server->smbd_conn->socket.parameters; + if (server->sign) /* * Account for SMB2 data transfer packet header and @@ -503,12 +506,12 @@ smb3_negotiate_wsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) */ wsize = min_t(unsigned int, wsize, - server->smbd_conn->max_fragmented_send_size - + sp->max_fragmented_send_size - SMB2_READWRITE_PDU_HEADER_SIZE - sizeof(struct smb2_transform_hdr)); else wsize = min_t(unsigned int, - wsize, server->smbd_conn->max_readwrite_size); + wsize, sp->max_read_write_size); } #endif if (!(server->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU)) @@ -544,6 +547,9 @@ smb3_negotiate_rsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) rsize = min_t(unsigned int, rsize, server->max_read); #ifdef CONFIG_CIFS_SMB_DIRECT if (server->rdma) { + struct smbdirect_socket_parameters *sp = + &server->smbd_conn->socket.parameters; + if (server->sign) /* * Account for SMB2 data transfer packet header and @@ -551,12 +557,12 @@ smb3_negotiate_rsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) */ rsize = min_t(unsigned int, rsize, - server->smbd_conn->max_fragmented_recv_size - + sp->max_fragmented_recv_size - SMB2_READWRITE_PDU_HEADER_SIZE - sizeof(struct smb2_transform_hdr)); else rsize = min_t(unsigned int, - rsize, server->smbd_conn->max_readwrite_size); + rsize, sp->max_read_write_size); } #endif diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 3aa5927254343..61737726a916c 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -36,6 +36,7 @@ #include "smb2glob.h" #include "cifspdu.h" #include "cifs_spnego.h" +#include "../common/smbdirect/smbdirect.h" #include "smbdirect.h" #include "trace.h" #ifdef CONFIG_CIFS_DFS_UPCALL @@ -4427,10 +4428,10 @@ smb2_new_read_req(void **buf, unsigned int *total_len, #ifdef CONFIG_CIFS_SMB_DIRECT /* * If we want to do a RDMA write, fill in and append - * smbd_buffer_descriptor_v1 to the end of read request + * smbdirect_buffer_descriptor_v1 to the end of read request */ if (rdata && smb3_use_rdma_offload(io_parms)) { - struct smbd_buffer_descriptor_v1 *v1; + struct smbdirect_buffer_descriptor_v1 *v1; bool need_invalidate = server->dialect == SMB30_PROT_ID; rdata->mr = smbd_register_mr(server->smbd_conn, &rdata->subreq.io_iter, @@ -4444,8 +4445,8 @@ smb2_new_read_req(void **buf, unsigned int *total_len, req->ReadChannelInfoOffset = cpu_to_le16(offsetof(struct smb2_read_req, Buffer)); req->ReadChannelInfoLength = - cpu_to_le16(sizeof(struct smbd_buffer_descriptor_v1)); - v1 = (struct smbd_buffer_descriptor_v1 *) &req->Buffer[0]; + cpu_to_le16(sizeof(struct smbdirect_buffer_descriptor_v1)); + v1 = (struct smbdirect_buffer_descriptor_v1 *) &req->Buffer[0]; v1->offset = cpu_to_le64(rdata->mr->mr->iova); v1->token = cpu_to_le32(rdata->mr->mr->rkey); v1->length = cpu_to_le32(rdata->mr->mr->length); @@ -4957,10 +4958,10 @@ smb2_async_writev(struct cifs_io_subrequest *wdata) #ifdef CONFIG_CIFS_SMB_DIRECT /* * If we want to do a server RDMA read, fill in and append - * smbd_buffer_descriptor_v1 to the end of write request + * smbdirect_buffer_descriptor_v1 to the end of write request */ if (smb3_use_rdma_offload(io_parms)) { - struct smbd_buffer_descriptor_v1 *v1; + struct smbdirect_buffer_descriptor_v1 *v1; bool need_invalidate = server->dialect == SMB30_PROT_ID; wdata->mr = smbd_register_mr(server->smbd_conn, &wdata->subreq.io_iter, @@ -4979,8 +4980,8 @@ smb2_async_writev(struct cifs_io_subrequest *wdata) req->WriteChannelInfoOffset = cpu_to_le16(offsetof(struct smb2_write_req, Buffer)); req->WriteChannelInfoLength = - cpu_to_le16(sizeof(struct smbd_buffer_descriptor_v1)); - v1 = (struct smbd_buffer_descriptor_v1 *) &req->Buffer[0]; + cpu_to_le16(sizeof(struct smbdirect_buffer_descriptor_v1)); + v1 = (struct smbdirect_buffer_descriptor_v1 *) &req->Buffer[0]; v1->offset = cpu_to_le64(wdata->mr->mr->iova); v1->token = cpu_to_le32(wdata->mr->mr->rkey); v1->length = cpu_to_le32(wdata->mr->mr->length); diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index b0b7254661e92..754e94a0e07f5 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -7,6 +7,7 @@ #include #include #include +#include "../common/smbdirect/smbdirect_pdu.h" #include "smbdirect.h" #include "cifs_debug.h" #include "cifsproto.h" @@ -50,9 +51,6 @@ struct smb_extract_to_rdma { static ssize_t smb_extract_iter_to_rdma(struct iov_iter *iter, size_t len, struct smb_extract_to_rdma *rdma); -/* SMBD version number */ -#define SMBD_V1 0x0100 - /* Port numbers for SMBD transport */ #define SMB_PORT 445 #define SMBD_PORT 5445 @@ -165,10 +163,11 @@ static void smbd_disconnect_rdma_work(struct work_struct *work) { struct smbd_connection *info = container_of(work, struct smbd_connection, disconnect_work); + struct smbdirect_socket *sc = &info->socket; - if (info->transport_status == SMBD_CONNECTED) { - info->transport_status = SMBD_DISCONNECTING; - rdma_disconnect(info->id); + if (sc->status == SMBDIRECT_SOCKET_CONNECTED) { + sc->status = SMBDIRECT_SOCKET_DISCONNECTING; + rdma_disconnect(sc->rdma.cm_id); } } @@ -182,6 +181,7 @@ static int smbd_conn_upcall( struct rdma_cm_id *id, struct rdma_cm_event *event) { struct smbd_connection *info = id->context; + struct smbdirect_socket *sc = &info->socket; log_rdma_event(INFO, "event=%d status=%d\n", event->event, event->status); @@ -205,7 +205,7 @@ static int smbd_conn_upcall( case RDMA_CM_EVENT_ESTABLISHED: log_rdma_event(INFO, "connected event=%d\n", event->event); - info->transport_status = SMBD_CONNECTED; + sc->status = SMBDIRECT_SOCKET_CONNECTED; wake_up_interruptible(&info->conn_wait); break; @@ -213,20 +213,20 @@ static int smbd_conn_upcall( case RDMA_CM_EVENT_UNREACHABLE: case RDMA_CM_EVENT_REJECTED: log_rdma_event(INFO, "connecting failed event=%d\n", event->event); - info->transport_status = SMBD_DISCONNECTED; + sc->status = SMBDIRECT_SOCKET_DISCONNECTED; wake_up_interruptible(&info->conn_wait); break; case RDMA_CM_EVENT_DEVICE_REMOVAL: case RDMA_CM_EVENT_DISCONNECTED: /* This happens when we fail the negotiation */ - if (info->transport_status == SMBD_NEGOTIATE_FAILED) { - info->transport_status = SMBD_DISCONNECTED; + if (sc->status == SMBDIRECT_SOCKET_NEGOTIATE_FAILED) { + sc->status = SMBDIRECT_SOCKET_DISCONNECTED; wake_up(&info->conn_wait); break; } - info->transport_status = SMBD_DISCONNECTED; + sc->status = SMBDIRECT_SOCKET_DISCONNECTED; wake_up_interruptible(&info->disconn_wait); wake_up_interruptible(&info->wait_reassembly_queue); wake_up_interruptible_all(&info->wait_send_queue); @@ -275,6 +275,8 @@ static void send_done(struct ib_cq *cq, struct ib_wc *wc) int i; struct smbd_request *request = container_of(wc->wr_cqe, struct smbd_request, cqe); + struct smbd_connection *info = request->info; + struct smbdirect_socket *sc = &info->socket; log_rdma_send(INFO, "smbd_request 0x%p completed wc->status=%d\n", request, wc->status); @@ -286,7 +288,7 @@ static void send_done(struct ib_cq *cq, struct ib_wc *wc) } for (i = 0; i < request->num_sge; i++) - ib_dma_unmap_single(request->info->id->device, + ib_dma_unmap_single(sc->ib.dev, request->sge[i].addr, request->sge[i].length, DMA_TO_DEVICE); @@ -299,7 +301,7 @@ static void send_done(struct ib_cq *cq, struct ib_wc *wc) mempool_free(request, request->info->request_mempool); } -static void dump_smbd_negotiate_resp(struct smbd_negotiate_resp *resp) +static void dump_smbdirect_negotiate_resp(struct smbdirect_negotiate_resp *resp) { log_rdma_event(INFO, "resp message min_version %u max_version %u negotiated_version %u credits_requested %u credits_granted %u status %u max_readwrite_size %u preferred_send_size %u max_receive_size %u max_fragmented_size %u\n", resp->min_version, resp->max_version, @@ -318,15 +320,17 @@ static bool process_negotiation_response( struct smbd_response *response, int packet_length) { struct smbd_connection *info = response->info; - struct smbd_negotiate_resp *packet = smbd_response_payload(response); + struct smbdirect_socket *sc = &info->socket; + struct smbdirect_socket_parameters *sp = &sc->parameters; + struct smbdirect_negotiate_resp *packet = smbd_response_payload(response); - if (packet_length < sizeof(struct smbd_negotiate_resp)) { + if (packet_length < sizeof(struct smbdirect_negotiate_resp)) { log_rdma_event(ERR, "error: packet_length=%d\n", packet_length); return false; } - if (le16_to_cpu(packet->negotiated_version) != SMBD_V1) { + if (le16_to_cpu(packet->negotiated_version) != SMBDIRECT_V1) { log_rdma_event(ERR, "error: negotiated_version=%x\n", le16_to_cpu(packet->negotiated_version)); return false; @@ -347,20 +351,20 @@ static bool process_negotiation_response( atomic_set(&info->receive_credits, 0); - if (le32_to_cpu(packet->preferred_send_size) > info->max_receive_size) { + if (le32_to_cpu(packet->preferred_send_size) > sp->max_recv_size) { log_rdma_event(ERR, "error: preferred_send_size=%d\n", le32_to_cpu(packet->preferred_send_size)); return false; } - info->max_receive_size = le32_to_cpu(packet->preferred_send_size); + sp->max_recv_size = le32_to_cpu(packet->preferred_send_size); if (le32_to_cpu(packet->max_receive_size) < SMBD_MIN_RECEIVE_SIZE) { log_rdma_event(ERR, "error: max_receive_size=%d\n", le32_to_cpu(packet->max_receive_size)); return false; } - info->max_send_size = min_t(int, info->max_send_size, - le32_to_cpu(packet->max_receive_size)); + sp->max_send_size = min_t(u32, sp->max_send_size, + le32_to_cpu(packet->max_receive_size)); if (le32_to_cpu(packet->max_fragmented_size) < SMBD_MIN_FRAGMENTED_SIZE) { @@ -368,18 +372,18 @@ static bool process_negotiation_response( le32_to_cpu(packet->max_fragmented_size)); return false; } - info->max_fragmented_send_size = + sp->max_fragmented_send_size = le32_to_cpu(packet->max_fragmented_size); info->rdma_readwrite_threshold = - rdma_readwrite_threshold > info->max_fragmented_send_size ? - info->max_fragmented_send_size : + rdma_readwrite_threshold > sp->max_fragmented_send_size ? + sp->max_fragmented_send_size : rdma_readwrite_threshold; - info->max_readwrite_size = min_t(u32, + sp->max_read_write_size = min_t(u32, le32_to_cpu(packet->max_readwrite_size), info->max_frmr_depth * PAGE_SIZE); - info->max_frmr_depth = info->max_readwrite_size / PAGE_SIZE; + info->max_frmr_depth = sp->max_read_write_size / PAGE_SIZE; return true; } @@ -393,8 +397,9 @@ static void smbd_post_send_credits(struct work_struct *work) struct smbd_connection *info = container_of(work, struct smbd_connection, post_send_credits_work); + struct smbdirect_socket *sc = &info->socket; - if (info->transport_status != SMBD_CONNECTED) { + if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { wake_up(&info->wait_receive_queues); return; } @@ -448,7 +453,7 @@ static void smbd_post_send_credits(struct work_struct *work) /* Called from softirq, when recv is done */ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) { - struct smbd_data_transfer *data_transfer; + struct smbdirect_data_transfer *data_transfer; struct smbd_response *response = container_of(wc->wr_cqe, struct smbd_response, cqe); struct smbd_connection *info = response->info; @@ -474,7 +479,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) switch (response->type) { /* SMBD negotiation response */ case SMBD_NEGOTIATE_RESP: - dump_smbd_negotiate_resp(smbd_response_payload(response)); + dump_smbdirect_negotiate_resp(smbd_response_payload(response)); info->full_packet_received = true; info->negotiate_done = process_negotiation_response(response, wc->byte_len); @@ -531,7 +536,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) /* Send a KEEP_ALIVE response right away if requested */ info->keep_alive_requested = KEEP_ALIVE_NONE; if (le16_to_cpu(data_transfer->flags) & - SMB_DIRECT_RESPONSE_REQUESTED) { + SMBDIRECT_FLAG_RESPONSE_REQUESTED) { info->keep_alive_requested = KEEP_ALIVE_PENDING; } @@ -635,32 +640,34 @@ static int smbd_ia_open( struct smbd_connection *info, struct sockaddr *dstaddr, int port) { + struct smbdirect_socket *sc = &info->socket; int rc; - info->id = smbd_create_id(info, dstaddr, port); - if (IS_ERR(info->id)) { - rc = PTR_ERR(info->id); + sc->rdma.cm_id = smbd_create_id(info, dstaddr, port); + if (IS_ERR(sc->rdma.cm_id)) { + rc = PTR_ERR(sc->rdma.cm_id); goto out1; } + sc->ib.dev = sc->rdma.cm_id->device; - if (!frwr_is_supported(&info->id->device->attrs)) { + if (!frwr_is_supported(&sc->ib.dev->attrs)) { log_rdma_event(ERR, "Fast Registration Work Requests (FRWR) is not supported\n"); log_rdma_event(ERR, "Device capability flags = %llx max_fast_reg_page_list_len = %u\n", - info->id->device->attrs.device_cap_flags, - info->id->device->attrs.max_fast_reg_page_list_len); + sc->ib.dev->attrs.device_cap_flags, + sc->ib.dev->attrs.max_fast_reg_page_list_len); rc = -EPROTONOSUPPORT; goto out2; } info->max_frmr_depth = min_t(int, smbd_max_frmr_depth, - info->id->device->attrs.max_fast_reg_page_list_len); + sc->ib.dev->attrs.max_fast_reg_page_list_len); info->mr_type = IB_MR_TYPE_MEM_REG; - if (info->id->device->attrs.kernel_cap_flags & IBK_SG_GAPS_REG) + if (sc->ib.dev->attrs.kernel_cap_flags & IBK_SG_GAPS_REG) info->mr_type = IB_MR_TYPE_SG_GAPS; - info->pd = ib_alloc_pd(info->id->device, 0); - if (IS_ERR(info->pd)) { - rc = PTR_ERR(info->pd); + sc->ib.pd = ib_alloc_pd(sc->ib.dev, 0); + if (IS_ERR(sc->ib.pd)) { + rc = PTR_ERR(sc->ib.pd); log_rdma_event(ERR, "ib_alloc_pd() returned %d\n", rc); goto out2; } @@ -668,8 +675,8 @@ static int smbd_ia_open( return 0; out2: - rdma_destroy_id(info->id); - info->id = NULL; + rdma_destroy_id(sc->rdma.cm_id); + sc->rdma.cm_id = NULL; out1: return rc; @@ -683,10 +690,12 @@ static int smbd_ia_open( */ static int smbd_post_send_negotiate_req(struct smbd_connection *info) { + struct smbdirect_socket *sc = &info->socket; + struct smbdirect_socket_parameters *sp = &sc->parameters; struct ib_send_wr send_wr; int rc = -ENOMEM; struct smbd_request *request; - struct smbd_negotiate_req *packet; + struct smbdirect_negotiate_req *packet; request = mempool_alloc(info->request_mempool, GFP_KERNEL); if (!request) @@ -695,29 +704,29 @@ static int smbd_post_send_negotiate_req(struct smbd_connection *info) request->info = info; packet = smbd_request_payload(request); - packet->min_version = cpu_to_le16(SMBD_V1); - packet->max_version = cpu_to_le16(SMBD_V1); + packet->min_version = cpu_to_le16(SMBDIRECT_V1); + packet->max_version = cpu_to_le16(SMBDIRECT_V1); packet->reserved = 0; - packet->credits_requested = cpu_to_le16(info->send_credit_target); - packet->preferred_send_size = cpu_to_le32(info->max_send_size); - packet->max_receive_size = cpu_to_le32(info->max_receive_size); + packet->credits_requested = cpu_to_le16(sp->send_credit_target); + packet->preferred_send_size = cpu_to_le32(sp->max_send_size); + packet->max_receive_size = cpu_to_le32(sp->max_recv_size); packet->max_fragmented_size = - cpu_to_le32(info->max_fragmented_recv_size); + cpu_to_le32(sp->max_fragmented_recv_size); request->num_sge = 1; request->sge[0].addr = ib_dma_map_single( - info->id->device, (void *)packet, + sc->ib.dev, (void *)packet, sizeof(*packet), DMA_TO_DEVICE); - if (ib_dma_mapping_error(info->id->device, request->sge[0].addr)) { + if (ib_dma_mapping_error(sc->ib.dev, request->sge[0].addr)) { rc = -EIO; goto dma_mapping_failed; } request->sge[0].length = sizeof(*packet); - request->sge[0].lkey = info->pd->local_dma_lkey; + request->sge[0].lkey = sc->ib.pd->local_dma_lkey; ib_dma_sync_single_for_device( - info->id->device, request->sge[0].addr, + sc->ib.dev, request->sge[0].addr, request->sge[0].length, DMA_TO_DEVICE); request->cqe.done = send_done; @@ -734,14 +743,14 @@ static int smbd_post_send_negotiate_req(struct smbd_connection *info) request->sge[0].length, request->sge[0].lkey); atomic_inc(&info->send_pending); - rc = ib_post_send(info->id->qp, &send_wr, NULL); + rc = ib_post_send(sc->ib.qp, &send_wr, NULL); if (!rc) return 0; /* if we reach here, post send failed */ log_rdma_send(ERR, "ib_post_send failed rc=%d\n", rc); atomic_dec(&info->send_pending); - ib_dma_unmap_single(info->id->device, request->sge[0].addr, + ib_dma_unmap_single(sc->ib.dev, request->sge[0].addr, request->sge[0].length, DMA_TO_DEVICE); smbd_disconnect_rdma_connection(info); @@ -774,10 +783,10 @@ static int manage_credits_prior_sending(struct smbd_connection *info) /* * Check if we need to send a KEEP_ALIVE message * The idle connection timer triggers a KEEP_ALIVE message when expires - * SMB_DIRECT_RESPONSE_REQUESTED is set in the message flag to have peer send + * SMBDIRECT_FLAG_RESPONSE_REQUESTED is set in the message flag to have peer send * back a response. * return value: - * 1 if SMB_DIRECT_RESPONSE_REQUESTED needs to be set + * 1 if SMBDIRECT_FLAG_RESPONSE_REQUESTED needs to be set * 0: otherwise */ static int manage_keep_alive_before_sending(struct smbd_connection *info) @@ -793,6 +802,8 @@ static int manage_keep_alive_before_sending(struct smbd_connection *info) static int smbd_post_send(struct smbd_connection *info, struct smbd_request *request) { + struct smbdirect_socket *sc = &info->socket; + struct smbdirect_socket_parameters *sp = &sc->parameters; struct ib_send_wr send_wr; int rc, i; @@ -801,7 +812,7 @@ static int smbd_post_send(struct smbd_connection *info, "rdma_request sge[%d] addr=0x%llx length=%u\n", i, request->sge[i].addr, request->sge[i].length); ib_dma_sync_single_for_device( - info->id->device, + sc->ib.dev, request->sge[i].addr, request->sge[i].length, DMA_TO_DEVICE); @@ -816,7 +827,7 @@ static int smbd_post_send(struct smbd_connection *info, send_wr.opcode = IB_WR_SEND; send_wr.send_flags = IB_SEND_SIGNALED; - rc = ib_post_send(info->id->qp, &send_wr, NULL); + rc = ib_post_send(sc->ib.qp, &send_wr, NULL); if (rc) { log_rdma_send(ERR, "ib_post_send failed rc=%d\n", rc); smbd_disconnect_rdma_connection(info); @@ -824,7 +835,7 @@ static int smbd_post_send(struct smbd_connection *info, } else /* Reset timer for idle connection after packet is sent */ mod_delayed_work(info->workqueue, &info->idle_timer_work, - info->keep_alive_interval*HZ); + msecs_to_jiffies(sp->keepalive_interval_msec)); return rc; } @@ -833,22 +844,24 @@ static int smbd_post_send_iter(struct smbd_connection *info, struct iov_iter *iter, int *_remaining_data_length) { + struct smbdirect_socket *sc = &info->socket; + struct smbdirect_socket_parameters *sp = &sc->parameters; int i, rc; int header_length; int data_length; struct smbd_request *request; - struct smbd_data_transfer *packet; + struct smbdirect_data_transfer *packet; int new_credits = 0; wait_credit: /* Wait for send credits. A SMBD packet needs one credit */ rc = wait_event_interruptible(info->wait_send_queue, atomic_read(&info->send_credits) > 0 || - info->transport_status != SMBD_CONNECTED); + sc->status != SMBDIRECT_SOCKET_CONNECTED); if (rc) goto err_wait_credit; - if (info->transport_status != SMBD_CONNECTED) { + if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { log_outgoing(ERR, "disconnected not sending on wait_credit\n"); rc = -EAGAIN; goto err_wait_credit; @@ -860,17 +873,17 @@ static int smbd_post_send_iter(struct smbd_connection *info, wait_send_queue: wait_event(info->wait_post_send, - atomic_read(&info->send_pending) < info->send_credit_target || - info->transport_status != SMBD_CONNECTED); + atomic_read(&info->send_pending) < sp->send_credit_target || + sc->status != SMBDIRECT_SOCKET_CONNECTED); - if (info->transport_status != SMBD_CONNECTED) { + if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { log_outgoing(ERR, "disconnected not sending on wait_send_queue\n"); rc = -EAGAIN; goto err_wait_send_queue; } if (unlikely(atomic_inc_return(&info->send_pending) > - info->send_credit_target)) { + sp->send_credit_target)) { atomic_dec(&info->send_pending); goto wait_send_queue; } @@ -890,12 +903,14 @@ static int smbd_post_send_iter(struct smbd_connection *info, .nr_sge = 1, .max_sge = SMBDIRECT_MAX_SEND_SGE, .sge = request->sge, - .device = info->id->device, - .local_dma_lkey = info->pd->local_dma_lkey, + .device = sc->ib.dev, + .local_dma_lkey = sc->ib.pd->local_dma_lkey, .direction = DMA_TO_DEVICE, }; + size_t payload_len = umin(*_remaining_data_length, + sp->max_send_size - sizeof(*packet)); - rc = smb_extract_iter_to_rdma(iter, *_remaining_data_length, + rc = smb_extract_iter_to_rdma(iter, payload_len, &extract); if (rc < 0) goto err_dma; @@ -909,7 +924,7 @@ static int smbd_post_send_iter(struct smbd_connection *info, /* Fill in the packet header */ packet = smbd_request_payload(request); - packet->credits_requested = cpu_to_le16(info->send_credit_target); + packet->credits_requested = cpu_to_le16(sp->send_credit_target); new_credits = manage_credits_prior_sending(info); atomic_add(new_credits, &info->receive_credits); @@ -919,7 +934,7 @@ static int smbd_post_send_iter(struct smbd_connection *info, packet->flags = 0; if (manage_keep_alive_before_sending(info)) - packet->flags |= cpu_to_le16(SMB_DIRECT_RESPONSE_REQUESTED); + packet->flags |= cpu_to_le16(SMBDIRECT_FLAG_RESPONSE_REQUESTED); packet->reserved = 0; if (!data_length) @@ -938,23 +953,23 @@ static int smbd_post_send_iter(struct smbd_connection *info, le32_to_cpu(packet->remaining_data_length)); /* Map the packet to DMA */ - header_length = sizeof(struct smbd_data_transfer); + header_length = sizeof(struct smbdirect_data_transfer); /* If this is a packet without payload, don't send padding */ if (!data_length) - header_length = offsetof(struct smbd_data_transfer, padding); + header_length = offsetof(struct smbdirect_data_transfer, padding); - request->sge[0].addr = ib_dma_map_single(info->id->device, + request->sge[0].addr = ib_dma_map_single(sc->ib.dev, (void *)packet, header_length, DMA_TO_DEVICE); - if (ib_dma_mapping_error(info->id->device, request->sge[0].addr)) { + if (ib_dma_mapping_error(sc->ib.dev, request->sge[0].addr)) { rc = -EIO; request->sge[0].addr = 0; goto err_dma; } request->sge[0].length = header_length; - request->sge[0].lkey = info->pd->local_dma_lkey; + request->sge[0].lkey = sc->ib.pd->local_dma_lkey; rc = smbd_post_send(info, request); if (!rc) @@ -963,7 +978,7 @@ static int smbd_post_send_iter(struct smbd_connection *info, err_dma: for (i = 0; i < request->num_sge; i++) if (request->sge[i].addr) - ib_dma_unmap_single(info->id->device, + ib_dma_unmap_single(sc->ib.dev, request->sge[i].addr, request->sge[i].length, DMA_TO_DEVICE); @@ -1000,6 +1015,27 @@ static int smbd_post_send_empty(struct smbd_connection *info) return smbd_post_send_iter(info, NULL, &remaining_data_length); } +static int smbd_post_send_full_iter(struct smbd_connection *info, + struct iov_iter *iter, + int *_remaining_data_length) +{ + int rc = 0; + + /* + * smbd_post_send_iter() respects the + * negotiated max_send_size, so we need to + * loop until the full iter is posted + */ + + while (iov_iter_count(iter) > 0) { + rc = smbd_post_send_iter(info, iter, _remaining_data_length); + if (rc < 0) + break; + } + + return rc; +} + /* * Post a receive request to the transport * The remote peer can only send data when a receive request is posted @@ -1008,17 +1044,19 @@ static int smbd_post_send_empty(struct smbd_connection *info) static int smbd_post_recv( struct smbd_connection *info, struct smbd_response *response) { + struct smbdirect_socket *sc = &info->socket; + struct smbdirect_socket_parameters *sp = &sc->parameters; struct ib_recv_wr recv_wr; int rc = -EIO; response->sge.addr = ib_dma_map_single( - info->id->device, response->packet, - info->max_receive_size, DMA_FROM_DEVICE); - if (ib_dma_mapping_error(info->id->device, response->sge.addr)) + sc->ib.dev, response->packet, + sp->max_recv_size, DMA_FROM_DEVICE); + if (ib_dma_mapping_error(sc->ib.dev, response->sge.addr)) return rc; - response->sge.length = info->max_receive_size; - response->sge.lkey = info->pd->local_dma_lkey; + response->sge.length = sp->max_recv_size; + response->sge.lkey = sc->ib.pd->local_dma_lkey; response->cqe.done = recv_done; @@ -1027,9 +1065,9 @@ static int smbd_post_recv( recv_wr.sg_list = &response->sge; recv_wr.num_sge = 1; - rc = ib_post_recv(info->id->qp, &recv_wr, NULL); + rc = ib_post_recv(sc->ib.qp, &recv_wr, NULL); if (rc) { - ib_dma_unmap_single(info->id->device, response->sge.addr, + ib_dma_unmap_single(sc->ib.dev, response->sge.addr, response->sge.length, DMA_FROM_DEVICE); smbd_disconnect_rdma_connection(info); log_rdma_recv(ERR, "ib_post_recv failed rc=%d\n", rc); @@ -1187,9 +1225,10 @@ static struct smbd_response *get_receive_buffer(struct smbd_connection *info) static void put_receive_buffer( struct smbd_connection *info, struct smbd_response *response) { + struct smbdirect_socket *sc = &info->socket; unsigned long flags; - ib_dma_unmap_single(info->id->device, response->sge.addr, + ib_dma_unmap_single(sc->ib.dev, response->sge.addr, response->sge.length, DMA_FROM_DEVICE); spin_lock_irqsave(&info->receive_queue_lock, flags); @@ -1264,6 +1303,8 @@ static void idle_connection_timer(struct work_struct *work) struct smbd_connection *info = container_of( work, struct smbd_connection, idle_timer_work.work); + struct smbdirect_socket *sc = &info->socket; + struct smbdirect_socket_parameters *sp = &sc->parameters; if (info->keep_alive_requested != KEEP_ALIVE_NONE) { log_keep_alive(ERR, @@ -1278,7 +1319,7 @@ static void idle_connection_timer(struct work_struct *work) /* Setup the next idle timeout work */ queue_delayed_work(info->workqueue, &info->idle_timer_work, - info->keep_alive_interval*HZ); + msecs_to_jiffies(sp->keepalive_interval_msec)); } /* @@ -1289,6 +1330,8 @@ static void idle_connection_timer(struct work_struct *work) void smbd_destroy(struct TCP_Server_Info *server) { struct smbd_connection *info = server->smbd_conn; + struct smbdirect_socket *sc; + struct smbdirect_socket_parameters *sp; struct smbd_response *response; unsigned long flags; @@ -1296,19 +1339,22 @@ void smbd_destroy(struct TCP_Server_Info *server) log_rdma_event(INFO, "rdma session already destroyed\n"); return; } + sc = &info->socket; + sp = &sc->parameters; log_rdma_event(INFO, "destroying rdma session\n"); - if (info->transport_status != SMBD_DISCONNECTED) { - rdma_disconnect(server->smbd_conn->id); + if (sc->status != SMBDIRECT_SOCKET_DISCONNECTED) { + rdma_disconnect(sc->rdma.cm_id); log_rdma_event(INFO, "wait for transport being disconnected\n"); wait_event_interruptible( info->disconn_wait, - info->transport_status == SMBD_DISCONNECTED); + sc->status == SMBDIRECT_SOCKET_DISCONNECTED); } log_rdma_event(INFO, "destroying qp\n"); - ib_drain_qp(info->id->qp); - rdma_destroy_qp(info->id); + ib_drain_qp(sc->ib.qp); + rdma_destroy_qp(sc->rdma.cm_id); + sc->ib.qp = NULL; log_rdma_event(INFO, "cancelling idle timer\n"); cancel_delayed_work_sync(&info->idle_timer_work); @@ -1336,7 +1382,7 @@ void smbd_destroy(struct TCP_Server_Info *server) log_rdma_event(INFO, "free receive buffers\n"); wait_event(info->wait_receive_queues, info->count_receive_queue + info->count_empty_packet_queue - == info->receive_credit_max); + == sp->recv_credit_max); destroy_receive_buffers(info); /* @@ -1355,10 +1401,10 @@ void smbd_destroy(struct TCP_Server_Info *server) } destroy_mr_list(info); - ib_free_cq(info->send_cq); - ib_free_cq(info->recv_cq); - ib_dealloc_pd(info->pd); - rdma_destroy_id(info->id); + ib_free_cq(sc->ib.send_cq); + ib_free_cq(sc->ib.recv_cq); + ib_dealloc_pd(sc->ib.pd); + rdma_destroy_id(sc->rdma.cm_id); /* free mempools */ mempool_destroy(info->request_mempool); @@ -1367,7 +1413,7 @@ void smbd_destroy(struct TCP_Server_Info *server) mempool_destroy(info->response_mempool); kmem_cache_destroy(info->response_cache); - info->transport_status = SMBD_DESTROYED; + sc->status = SMBDIRECT_SOCKET_DESTROYED; destroy_workqueue(info->workqueue); log_rdma_event(INFO, "rdma session destroyed\n"); @@ -1392,7 +1438,7 @@ int smbd_reconnect(struct TCP_Server_Info *server) * This is possible if transport is disconnected and we haven't received * notification from RDMA, but upper layer has detected timeout */ - if (server->smbd_conn->transport_status == SMBD_CONNECTED) { + if (server->smbd_conn->socket.status == SMBDIRECT_SOCKET_CONNECTED) { log_rdma_event(INFO, "disconnecting transport\n"); smbd_destroy(server); } @@ -1424,37 +1470,47 @@ static void destroy_caches_and_workqueue(struct smbd_connection *info) #define MAX_NAME_LEN 80 static int allocate_caches_and_workqueue(struct smbd_connection *info) { + struct smbdirect_socket *sc = &info->socket; + struct smbdirect_socket_parameters *sp = &sc->parameters; char name[MAX_NAME_LEN]; int rc; + if (WARN_ON_ONCE(sp->max_recv_size < sizeof(struct smbdirect_data_transfer))) + return -ENOMEM; + scnprintf(name, MAX_NAME_LEN, "smbd_request_%p", info); info->request_cache = kmem_cache_create( name, sizeof(struct smbd_request) + - sizeof(struct smbd_data_transfer), + sizeof(struct smbdirect_data_transfer), 0, SLAB_HWCACHE_ALIGN, NULL); if (!info->request_cache) return -ENOMEM; info->request_mempool = - mempool_create(info->send_credit_target, mempool_alloc_slab, + mempool_create(sp->send_credit_target, mempool_alloc_slab, mempool_free_slab, info->request_cache); if (!info->request_mempool) goto out1; scnprintf(name, MAX_NAME_LEN, "smbd_response_%p", info); + + struct kmem_cache_args response_args = { + .align = __alignof__(struct smbd_response), + .useroffset = (offsetof(struct smbd_response, packet) + + sizeof(struct smbdirect_data_transfer)), + .usersize = sp->max_recv_size - sizeof(struct smbdirect_data_transfer), + }; info->response_cache = - kmem_cache_create( - name, - sizeof(struct smbd_response) + - info->max_receive_size, - 0, SLAB_HWCACHE_ALIGN, NULL); + kmem_cache_create(name, + sizeof(struct smbd_response) + sp->max_recv_size, + &response_args, SLAB_HWCACHE_ALIGN); if (!info->response_cache) goto out2; info->response_mempool = - mempool_create(info->receive_credit_max, mempool_alloc_slab, + mempool_create(sp->recv_credit_max, mempool_alloc_slab, mempool_free_slab, info->response_cache); if (!info->response_mempool) goto out3; @@ -1464,7 +1520,7 @@ static int allocate_caches_and_workqueue(struct smbd_connection *info) if (!info->workqueue) goto out4; - rc = allocate_receive_buffers(info, info->receive_credit_max); + rc = allocate_receive_buffers(info, sp->recv_credit_max); if (rc) { log_rdma_event(ERR, "failed to allocate receive buffers\n"); goto out5; @@ -1491,6 +1547,8 @@ static struct smbd_connection *_smbd_get_connection( { int rc; struct smbd_connection *info; + struct smbdirect_socket *sc; + struct smbdirect_socket_parameters *sp; struct rdma_conn_param conn_param; struct ib_qp_init_attr qp_attr; struct sockaddr_in *addr_in = (struct sockaddr_in *) dstaddr; @@ -1500,101 +1558,102 @@ static struct smbd_connection *_smbd_get_connection( info = kzalloc(sizeof(struct smbd_connection), GFP_KERNEL); if (!info) return NULL; + sc = &info->socket; + sp = &sc->parameters; - info->transport_status = SMBD_CONNECTING; + sc->status = SMBDIRECT_SOCKET_CONNECTING; rc = smbd_ia_open(info, dstaddr, port); if (rc) { log_rdma_event(INFO, "smbd_ia_open rc=%d\n", rc); goto create_id_failed; } - if (smbd_send_credit_target > info->id->device->attrs.max_cqe || - smbd_send_credit_target > info->id->device->attrs.max_qp_wr) { + if (smbd_send_credit_target > sc->ib.dev->attrs.max_cqe || + smbd_send_credit_target > sc->ib.dev->attrs.max_qp_wr) { log_rdma_event(ERR, "consider lowering send_credit_target = %d. Possible CQE overrun, device reporting max_cqe %d max_qp_wr %d\n", smbd_send_credit_target, - info->id->device->attrs.max_cqe, - info->id->device->attrs.max_qp_wr); + sc->ib.dev->attrs.max_cqe, + sc->ib.dev->attrs.max_qp_wr); goto config_failed; } - if (smbd_receive_credit_max > info->id->device->attrs.max_cqe || - smbd_receive_credit_max > info->id->device->attrs.max_qp_wr) { + if (smbd_receive_credit_max > sc->ib.dev->attrs.max_cqe || + smbd_receive_credit_max > sc->ib.dev->attrs.max_qp_wr) { log_rdma_event(ERR, "consider lowering receive_credit_max = %d. Possible CQE overrun, device reporting max_cqe %d max_qp_wr %d\n", smbd_receive_credit_max, - info->id->device->attrs.max_cqe, - info->id->device->attrs.max_qp_wr); + sc->ib.dev->attrs.max_cqe, + sc->ib.dev->attrs.max_qp_wr); goto config_failed; } - info->receive_credit_max = smbd_receive_credit_max; - info->send_credit_target = smbd_send_credit_target; - info->max_send_size = smbd_max_send_size; - info->max_fragmented_recv_size = smbd_max_fragmented_recv_size; - info->max_receive_size = smbd_max_receive_size; - info->keep_alive_interval = smbd_keep_alive_interval; + sp->recv_credit_max = smbd_receive_credit_max; + sp->send_credit_target = smbd_send_credit_target; + sp->max_send_size = smbd_max_send_size; + sp->max_fragmented_recv_size = smbd_max_fragmented_recv_size; + sp->max_recv_size = smbd_max_receive_size; + sp->keepalive_interval_msec = smbd_keep_alive_interval * 1000; - if (info->id->device->attrs.max_send_sge < SMBDIRECT_MAX_SEND_SGE || - info->id->device->attrs.max_recv_sge < SMBDIRECT_MAX_RECV_SGE) { + if (sc->ib.dev->attrs.max_send_sge < SMBDIRECT_MAX_SEND_SGE || + sc->ib.dev->attrs.max_recv_sge < SMBDIRECT_MAX_RECV_SGE) { log_rdma_event(ERR, "device %.*s max_send_sge/max_recv_sge = %d/%d too small\n", IB_DEVICE_NAME_MAX, - info->id->device->name, - info->id->device->attrs.max_send_sge, - info->id->device->attrs.max_recv_sge); + sc->ib.dev->name, + sc->ib.dev->attrs.max_send_sge, + sc->ib.dev->attrs.max_recv_sge); goto config_failed; } - info->send_cq = NULL; - info->recv_cq = NULL; - info->send_cq = - ib_alloc_cq_any(info->id->device, info, - info->send_credit_target, IB_POLL_SOFTIRQ); - if (IS_ERR(info->send_cq)) { - info->send_cq = NULL; + sc->ib.send_cq = + ib_alloc_cq_any(sc->ib.dev, info, + sp->send_credit_target, IB_POLL_SOFTIRQ); + if (IS_ERR(sc->ib.send_cq)) { + sc->ib.send_cq = NULL; goto alloc_cq_failed; } - info->recv_cq = - ib_alloc_cq_any(info->id->device, info, - info->receive_credit_max, IB_POLL_SOFTIRQ); - if (IS_ERR(info->recv_cq)) { - info->recv_cq = NULL; + sc->ib.recv_cq = + ib_alloc_cq_any(sc->ib.dev, info, + sp->recv_credit_max, IB_POLL_SOFTIRQ); + if (IS_ERR(sc->ib.recv_cq)) { + sc->ib.recv_cq = NULL; goto alloc_cq_failed; } memset(&qp_attr, 0, sizeof(qp_attr)); qp_attr.event_handler = smbd_qp_async_error_upcall; qp_attr.qp_context = info; - qp_attr.cap.max_send_wr = info->send_credit_target; - qp_attr.cap.max_recv_wr = info->receive_credit_max; + qp_attr.cap.max_send_wr = sp->send_credit_target; + qp_attr.cap.max_recv_wr = sp->recv_credit_max; qp_attr.cap.max_send_sge = SMBDIRECT_MAX_SEND_SGE; qp_attr.cap.max_recv_sge = SMBDIRECT_MAX_RECV_SGE; qp_attr.cap.max_inline_data = 0; qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; qp_attr.qp_type = IB_QPT_RC; - qp_attr.send_cq = info->send_cq; - qp_attr.recv_cq = info->recv_cq; + qp_attr.send_cq = sc->ib.send_cq; + qp_attr.recv_cq = sc->ib.recv_cq; qp_attr.port_num = ~0; - rc = rdma_create_qp(info->id, info->pd, &qp_attr); + rc = rdma_create_qp(sc->rdma.cm_id, sc->ib.pd, &qp_attr); if (rc) { log_rdma_event(ERR, "rdma_create_qp failed %i\n", rc); goto create_qp_failed; } + sc->ib.qp = sc->rdma.cm_id->qp; memset(&conn_param, 0, sizeof(conn_param)); conn_param.initiator_depth = 0; conn_param.responder_resources = - min(info->id->device->attrs.max_qp_rd_atom, + min(sc->ib.dev->attrs.max_qp_rd_atom, SMBD_CM_RESPONDER_RESOURCES); info->responder_resources = conn_param.responder_resources; log_rdma_mr(INFO, "responder_resources=%d\n", info->responder_resources); /* Need to send IRD/ORD in private data for iWARP */ - info->id->device->ops.get_port_immutable( - info->id->device, info->id->port_num, &port_immutable); + sc->ib.dev->ops.get_port_immutable( + sc->ib.dev, sc->rdma.cm_id->port_num, &port_immutable); if (port_immutable.core_cap_flags & RDMA_CORE_PORT_IWARP) { ird_ord_hdr[0] = info->responder_resources; ird_ord_hdr[1] = 1; @@ -1615,16 +1674,16 @@ static struct smbd_connection *_smbd_get_connection( init_waitqueue_head(&info->conn_wait); init_waitqueue_head(&info->disconn_wait); init_waitqueue_head(&info->wait_reassembly_queue); - rc = rdma_connect(info->id, &conn_param); + rc = rdma_connect(sc->rdma.cm_id, &conn_param); if (rc) { log_rdma_event(ERR, "rdma_connect() failed with %i\n", rc); goto rdma_connect_failed; } wait_event_interruptible( - info->conn_wait, info->transport_status != SMBD_CONNECTING); + info->conn_wait, sc->status != SMBDIRECT_SOCKET_CONNECTING); - if (info->transport_status != SMBD_CONNECTED) { + if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { log_rdma_event(ERR, "rdma_connect failed port=%d\n", port); goto rdma_connect_failed; } @@ -1640,7 +1699,7 @@ static struct smbd_connection *_smbd_get_connection( init_waitqueue_head(&info->wait_send_queue); INIT_DELAYED_WORK(&info->idle_timer_work, idle_connection_timer); queue_delayed_work(info->workqueue, &info->idle_timer_work, - info->keep_alive_interval*HZ); + msecs_to_jiffies(sp->keepalive_interval_msec)); init_waitqueue_head(&info->wait_send_pending); atomic_set(&info->send_pending, 0); @@ -1675,26 +1734,26 @@ static struct smbd_connection *_smbd_get_connection( negotiation_failed: cancel_delayed_work_sync(&info->idle_timer_work); destroy_caches_and_workqueue(info); - info->transport_status = SMBD_NEGOTIATE_FAILED; + sc->status = SMBDIRECT_SOCKET_NEGOTIATE_FAILED; init_waitqueue_head(&info->conn_wait); - rdma_disconnect(info->id); + rdma_disconnect(sc->rdma.cm_id); wait_event(info->conn_wait, - info->transport_status == SMBD_DISCONNECTED); + sc->status == SMBDIRECT_SOCKET_DISCONNECTED); allocate_cache_failed: rdma_connect_failed: - rdma_destroy_qp(info->id); + rdma_destroy_qp(sc->rdma.cm_id); create_qp_failed: alloc_cq_failed: - if (info->send_cq) - ib_free_cq(info->send_cq); - if (info->recv_cq) - ib_free_cq(info->recv_cq); + if (sc->ib.send_cq) + ib_free_cq(sc->ib.send_cq); + if (sc->ib.recv_cq) + ib_free_cq(sc->ib.recv_cq); config_failed: - ib_dealloc_pd(info->pd); - rdma_destroy_id(info->id); + ib_dealloc_pd(sc->ib.pd); + rdma_destroy_id(sc->rdma.cm_id); create_id_failed: kfree(info); @@ -1719,34 +1778,39 @@ struct smbd_connection *smbd_get_connection( } /* - * Receive data from receive reassembly queue + * Receive data from the transport's receive reassembly queue * All the incoming data packets are placed in reassembly queue - * buf: the buffer to read data into + * iter: the buffer to read data into * size: the length of data to read * return value: actual data read - * Note: this implementation copies the data from reassebmly queue to receive + * + * Note: this implementation copies the data from reassembly queue to receive * buffers used by upper layer. This is not the optimal code path. A better way * to do it is to not have upper layer allocate its receive buffers but rather * borrow the buffer from reassembly queue, and return it after data is * consumed. But this will require more changes to upper layer code, and also * need to consider packet boundaries while they still being reassembled. */ -static int smbd_recv_buf(struct smbd_connection *info, char *buf, - unsigned int size) +int smbd_recv(struct smbd_connection *info, struct msghdr *msg) { + struct smbdirect_socket *sc = &info->socket; struct smbd_response *response; - struct smbd_data_transfer *data_transfer; + struct smbdirect_data_transfer *data_transfer; + size_t size = iov_iter_count(&msg->msg_iter); int to_copy, to_read, data_read, offset; u32 data_length, remaining_data_length, data_offset; int rc; + if (WARN_ON_ONCE(iov_iter_rw(&msg->msg_iter) == WRITE)) + return -EINVAL; /* It's a bug in upper layer to get there */ + again: /* * No need to hold the reassembly queue lock all the time as we are * the only one reading from the front of the queue. The transport * may add more entries to the back of the queue at the same time */ - log_read(INFO, "size=%d info->reassembly_data_length=%d\n", size, + log_read(INFO, "size=%zd info->reassembly_data_length=%d\n", size, info->reassembly_data_length); if (info->reassembly_data_length >= size) { int queue_length; @@ -1784,7 +1848,10 @@ static int smbd_recv_buf(struct smbd_connection *info, char *buf, if (response->first_segment && size == 4) { unsigned int rfc1002_len = data_length + remaining_data_length; - *((__be32 *)buf) = cpu_to_be32(rfc1002_len); + __be32 rfc1002_hdr = cpu_to_be32(rfc1002_len); + if (copy_to_iter(&rfc1002_hdr, sizeof(rfc1002_hdr), + &msg->msg_iter) != sizeof(rfc1002_hdr)) + return -EFAULT; data_read = 4; response->first_segment = false; log_read(INFO, "returning rfc1002 length %d\n", @@ -1793,10 +1860,9 @@ static int smbd_recv_buf(struct smbd_connection *info, char *buf, } to_copy = min_t(int, data_length - offset, to_read); - memcpy( - buf + data_read, - (char *)data_transfer + data_offset + offset, - to_copy); + if (copy_to_iter((char *)data_transfer + data_offset + offset, + to_copy, &msg->msg_iter) != to_copy) + return -EFAULT; /* move on to the next buffer? */ if (to_copy == data_length - offset) { @@ -1848,12 +1914,12 @@ static int smbd_recv_buf(struct smbd_connection *info, char *buf, rc = wait_event_interruptible( info->wait_reassembly_queue, info->reassembly_data_length >= size || - info->transport_status != SMBD_CONNECTED); + sc->status != SMBDIRECT_SOCKET_CONNECTED); /* Don't return any data if interrupted */ if (rc) return rc; - if (info->transport_status != SMBD_CONNECTED) { + if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { log_read(ERR, "disconnected\n"); return -ECONNABORTED; } @@ -1861,89 +1927,6 @@ static int smbd_recv_buf(struct smbd_connection *info, char *buf, goto again; } -/* - * Receive a page from receive reassembly queue - * page: the page to read data into - * to_read: the length of data to read - * return value: actual data read - */ -static int smbd_recv_page(struct smbd_connection *info, - struct page *page, unsigned int page_offset, - unsigned int to_read) -{ - int ret; - char *to_address; - void *page_address; - - /* make sure we have the page ready for read */ - ret = wait_event_interruptible( - info->wait_reassembly_queue, - info->reassembly_data_length >= to_read || - info->transport_status != SMBD_CONNECTED); - if (ret) - return ret; - - /* now we can read from reassembly queue and not sleep */ - page_address = kmap_atomic(page); - to_address = (char *) page_address + page_offset; - - log_read(INFO, "reading from page=%p address=%p to_read=%d\n", - page, to_address, to_read); - - ret = smbd_recv_buf(info, to_address, to_read); - kunmap_atomic(page_address); - - return ret; -} - -/* - * Receive data from transport - * msg: a msghdr point to the buffer, can be ITER_KVEC or ITER_BVEC - * return: total bytes read, or 0. SMB Direct will not do partial read. - */ -int smbd_recv(struct smbd_connection *info, struct msghdr *msg) -{ - char *buf; - struct page *page; - unsigned int to_read, page_offset; - int rc; - - if (iov_iter_rw(&msg->msg_iter) == WRITE) { - /* It's a bug in upper layer to get there */ - cifs_dbg(VFS, "Invalid msg iter dir %u\n", - iov_iter_rw(&msg->msg_iter)); - rc = -EINVAL; - goto out; - } - - switch (iov_iter_type(&msg->msg_iter)) { - case ITER_KVEC: - buf = msg->msg_iter.kvec->iov_base; - to_read = msg->msg_iter.kvec->iov_len; - rc = smbd_recv_buf(info, buf, to_read); - break; - - case ITER_BVEC: - page = msg->msg_iter.bvec->bv_page; - page_offset = msg->msg_iter.bvec->bv_offset; - to_read = msg->msg_iter.bvec->bv_len; - rc = smbd_recv_page(info, page, page_offset, to_read); - break; - - default: - /* It's a bug in upper layer to get there */ - cifs_dbg(VFS, "Invalid msg type %d\n", - iov_iter_type(&msg->msg_iter)); - rc = -EINVAL; - } - -out: - /* SMBDirect will read it all or nothing */ - if (rc > 0) - msg->msg_iter.count = 0; - return rc; -} - /* * Send data to transport * Each rqst is transported as a SMBDirect payload @@ -1954,12 +1937,14 @@ int smbd_send(struct TCP_Server_Info *server, int num_rqst, struct smb_rqst *rqst_array) { struct smbd_connection *info = server->smbd_conn; + struct smbdirect_socket *sc = &info->socket; + struct smbdirect_socket_parameters *sp = &sc->parameters; struct smb_rqst *rqst; struct iov_iter iter; unsigned int remaining_data_length, klen; int rc, i, rqst_idx; - if (info->transport_status != SMBD_CONNECTED) + if (sc->status != SMBDIRECT_SOCKET_CONNECTED) return -EAGAIN; /* @@ -1971,10 +1956,10 @@ int smbd_send(struct TCP_Server_Info *server, for (i = 0; i < num_rqst; i++) remaining_data_length += smb_rqst_len(server, &rqst_array[i]); - if (unlikely(remaining_data_length > info->max_fragmented_send_size)) { + if (unlikely(remaining_data_length > sp->max_fragmented_send_size)) { /* assertion: payload never exceeds negotiated maximum */ log_write(ERR, "payload size %d > max size %d\n", - remaining_data_length, info->max_fragmented_send_size); + remaining_data_length, sp->max_fragmented_send_size); return -EINVAL; } @@ -2000,14 +1985,14 @@ int smbd_send(struct TCP_Server_Info *server, klen += rqst->rq_iov[i].iov_len; iov_iter_kvec(&iter, ITER_SOURCE, rqst->rq_iov, rqst->rq_nvec, klen); - rc = smbd_post_send_iter(info, &iter, &remaining_data_length); + rc = smbd_post_send_full_iter(info, &iter, &remaining_data_length); if (rc < 0) break; if (iov_iter_count(&rqst->rq_iter) > 0) { /* And then the data pages if there are any */ - rc = smbd_post_send_iter(info, &rqst->rq_iter, - &remaining_data_length); + rc = smbd_post_send_full_iter(info, &rqst->rq_iter, + &remaining_data_length); if (rc < 0) break; } @@ -2053,6 +2038,7 @@ static void smbd_mr_recovery_work(struct work_struct *work) { struct smbd_connection *info = container_of(work, struct smbd_connection, mr_recovery_work); + struct smbdirect_socket *sc = &info->socket; struct smbd_mr *smbdirect_mr; int rc; @@ -2070,7 +2056,7 @@ static void smbd_mr_recovery_work(struct work_struct *work) } smbdirect_mr->mr = ib_alloc_mr( - info->pd, info->mr_type, + sc->ib.pd, info->mr_type, info->max_frmr_depth); if (IS_ERR(smbdirect_mr->mr)) { log_rdma_mr(ERR, "ib_alloc_mr failed mr_type=%x max_frmr_depth=%x\n", @@ -2099,12 +2085,13 @@ static void smbd_mr_recovery_work(struct work_struct *work) static void destroy_mr_list(struct smbd_connection *info) { + struct smbdirect_socket *sc = &info->socket; struct smbd_mr *mr, *tmp; cancel_work_sync(&info->mr_recovery_work); list_for_each_entry_safe(mr, tmp, &info->mr_list, list) { if (mr->state == MR_INVALIDATED) - ib_dma_unmap_sg(info->id->device, mr->sgt.sgl, + ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); ib_dereg_mr(mr->mr); kfree(mr->sgt.sgl); @@ -2121,6 +2108,7 @@ static void destroy_mr_list(struct smbd_connection *info) */ static int allocate_mr_list(struct smbd_connection *info) { + struct smbdirect_socket *sc = &info->socket; int i; struct smbd_mr *smbdirect_mr, *tmp; @@ -2136,7 +2124,7 @@ static int allocate_mr_list(struct smbd_connection *info) smbdirect_mr = kzalloc(sizeof(*smbdirect_mr), GFP_KERNEL); if (!smbdirect_mr) goto cleanup_entries; - smbdirect_mr->mr = ib_alloc_mr(info->pd, info->mr_type, + smbdirect_mr->mr = ib_alloc_mr(sc->ib.pd, info->mr_type, info->max_frmr_depth); if (IS_ERR(smbdirect_mr->mr)) { log_rdma_mr(ERR, "ib_alloc_mr failed mr_type=%x max_frmr_depth=%x\n", @@ -2181,20 +2169,20 @@ static int allocate_mr_list(struct smbd_connection *info) */ static struct smbd_mr *get_mr(struct smbd_connection *info) { + struct smbdirect_socket *sc = &info->socket; struct smbd_mr *ret; int rc; again: rc = wait_event_interruptible(info->wait_mr, atomic_read(&info->mr_ready_count) || - info->transport_status != SMBD_CONNECTED); + sc->status != SMBDIRECT_SOCKET_CONNECTED); if (rc) { log_rdma_mr(ERR, "wait_event_interruptible rc=%x\n", rc); return NULL; } - if (info->transport_status != SMBD_CONNECTED) { - log_rdma_mr(ERR, "info->transport_status=%x\n", - info->transport_status); + if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { + log_rdma_mr(ERR, "sc->status=%x\n", sc->status); return NULL; } @@ -2247,6 +2235,7 @@ struct smbd_mr *smbd_register_mr(struct smbd_connection *info, struct iov_iter *iter, bool writing, bool need_invalidate) { + struct smbdirect_socket *sc = &info->socket; struct smbd_mr *smbdirect_mr; int rc, num_pages; enum dma_data_direction dir; @@ -2276,7 +2265,7 @@ struct smbd_mr *smbd_register_mr(struct smbd_connection *info, num_pages, iov_iter_count(iter), info->max_frmr_depth); smbd_iter_to_mr(info, iter, &smbdirect_mr->sgt, info->max_frmr_depth); - rc = ib_dma_map_sg(info->id->device, smbdirect_mr->sgt.sgl, + rc = ib_dma_map_sg(sc->ib.dev, smbdirect_mr->sgt.sgl, smbdirect_mr->sgt.nents, dir); if (!rc) { log_rdma_mr(ERR, "ib_dma_map_sg num_pages=%x dir=%x rc=%x\n", @@ -2312,7 +2301,7 @@ struct smbd_mr *smbd_register_mr(struct smbd_connection *info, * on IB_WR_REG_MR. Hardware enforces a barrier and order of execution * on the next ib_post_send when we actually send I/O to remote peer */ - rc = ib_post_send(info->id->qp, ®_wr->wr, NULL); + rc = ib_post_send(sc->ib.qp, ®_wr->wr, NULL); if (!rc) return smbdirect_mr; @@ -2321,7 +2310,7 @@ struct smbd_mr *smbd_register_mr(struct smbd_connection *info, /* If all failed, attempt to recover this MR by setting it MR_ERROR*/ map_mr_error: - ib_dma_unmap_sg(info->id->device, smbdirect_mr->sgt.sgl, + ib_dma_unmap_sg(sc->ib.dev, smbdirect_mr->sgt.sgl, smbdirect_mr->sgt.nents, smbdirect_mr->dir); dma_map_error: @@ -2359,6 +2348,7 @@ int smbd_deregister_mr(struct smbd_mr *smbdirect_mr) { struct ib_send_wr *wr; struct smbd_connection *info = smbdirect_mr->conn; + struct smbdirect_socket *sc = &info->socket; int rc = 0; if (smbdirect_mr->need_invalidate) { @@ -2372,7 +2362,7 @@ int smbd_deregister_mr(struct smbd_mr *smbdirect_mr) wr->send_flags = IB_SEND_SIGNALED; init_completion(&smbdirect_mr->invalidate_done); - rc = ib_post_send(info->id->qp, wr, NULL); + rc = ib_post_send(sc->ib.qp, wr, NULL); if (rc) { log_rdma_mr(ERR, "ib_post_send failed rc=%x\n", rc); smbd_disconnect_rdma_connection(info); @@ -2389,7 +2379,7 @@ int smbd_deregister_mr(struct smbd_mr *smbdirect_mr) if (smbdirect_mr->state == MR_INVALIDATED) { ib_dma_unmap_sg( - info->id->device, smbdirect_mr->sgt.sgl, + sc->ib.dev, smbdirect_mr->sgt.sgl, smbdirect_mr->sgt.nents, smbdirect_mr->dir); smbdirect_mr->state = MR_READY; @@ -2552,13 +2542,14 @@ static ssize_t smb_extract_folioq_to_rdma(struct iov_iter *iter, size_t fsize = folioq_folio_size(folioq, slot); if (offset < fsize) { - size_t part = umin(maxsize - ret, fsize - offset); + size_t part = umin(maxsize, fsize - offset); if (!smb_set_sge(rdma, folio_page(folio, 0), offset, part)) return -EIO; offset += part; ret += part; + maxsize -= part; } if (offset >= fsize) { @@ -2573,7 +2564,7 @@ static ssize_t smb_extract_folioq_to_rdma(struct iov_iter *iter, slot = 0; } } - } while (rdma->nr_sge < rdma->max_sge || maxsize > 0); + } while (rdma->nr_sge < rdma->max_sge && maxsize > 0); iter->folioq = folioq; iter->folioq_slot = slot; diff --git a/fs/smb/client/smbdirect.h b/fs/smb/client/smbdirect.h index c08e3665150d7..75b3f491c3ad6 100644 --- a/fs/smb/client/smbdirect.h +++ b/fs/smb/client/smbdirect.h @@ -15,6 +15,9 @@ #include #include +#include "../common/smbdirect/smbdirect.h" +#include "../common/smbdirect/smbdirect_socket.h" + extern int rdma_readwrite_threshold; extern int smbd_max_frmr_depth; extern int smbd_keep_alive_interval; @@ -50,14 +53,8 @@ enum smbd_connection_status { * 5. mempools for allocating packets */ struct smbd_connection { - enum smbd_connection_status transport_status; - - /* RDMA related */ - struct rdma_cm_id *id; - struct ib_qp_init_attr qp_attr; - struct ib_pd *pd; - struct ib_cq *send_cq, *recv_cq; - struct ib_device_attr dev_attr; + struct smbdirect_socket socket; + int ri_rc; struct completion ri_done; wait_queue_head_t conn_wait; @@ -72,15 +69,7 @@ struct smbd_connection { spinlock_t lock_new_credits_offered; int new_credits_offered; - /* Connection parameters defined in [MS-SMBD] 3.1.1.1 */ - int receive_credit_max; - int send_credit_target; - int max_send_size; - int max_fragmented_recv_size; - int max_fragmented_send_size; - int max_receive_size; - int keep_alive_interval; - int max_readwrite_size; + /* dynamic connection parameters defined in [MS-SMBD] 3.1.1.1 */ enum keep_alive_status keep_alive_requested; int protocol; atomic_t send_credits; @@ -177,54 +166,6 @@ enum smbd_message_type { SMBD_TRANSFER_DATA, }; -#define SMB_DIRECT_RESPONSE_REQUESTED 0x0001 - -/* SMBD negotiation request packet [MS-SMBD] 2.2.1 */ -struct smbd_negotiate_req { - __le16 min_version; - __le16 max_version; - __le16 reserved; - __le16 credits_requested; - __le32 preferred_send_size; - __le32 max_receive_size; - __le32 max_fragmented_size; -} __packed; - -/* SMBD negotiation response packet [MS-SMBD] 2.2.2 */ -struct smbd_negotiate_resp { - __le16 min_version; - __le16 max_version; - __le16 negotiated_version; - __le16 reserved; - __le16 credits_requested; - __le16 credits_granted; - __le32 status; - __le32 max_readwrite_size; - __le32 preferred_send_size; - __le32 max_receive_size; - __le32 max_fragmented_size; -} __packed; - -/* SMBD data transfer packet with payload [MS-SMBD] 2.2.3 */ -struct smbd_data_transfer { - __le16 credits_requested; - __le16 credits_granted; - __le16 flags; - __le16 reserved; - __le32 remaining_data_length; - __le32 data_offset; - __le32 data_length; - __le32 padding; - __u8 buffer[]; -} __packed; - -/* The packet fields for a registered RDMA buffer */ -struct smbd_buffer_descriptor_v1 { - __le64 offset; - __le32 token; - __le32 length; -} __packed; - /* Maximum number of SGEs used by smbdirect.c in any send work request */ #define SMBDIRECT_MAX_SEND_SGE 6 diff --git a/fs/smb/common/smbdirect/smbdirect.h b/fs/smb/common/smbdirect/smbdirect.h new file mode 100644 index 0000000000000..b9a385344ff31 --- /dev/null +++ b/fs/smb/common/smbdirect/smbdirect.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2017, Microsoft Corporation. + * Copyright (C) 2018, LG Electronics. + */ + +#ifndef __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_H__ +#define __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_H__ + +/* SMB-DIRECT buffer descriptor V1 structure [MS-SMBD] 2.2.3.1 */ +struct smbdirect_buffer_descriptor_v1 { + __le64 offset; + __le32 token; + __le32 length; +} __packed; + +/* + * Connection parameters mostly from [MS-SMBD] 3.1.1.1 + * + * These are setup and negotiated at the beginning of a + * connection and remain constant unless explicitly changed. + * + * Some values are important for the upper layer. + */ +struct smbdirect_socket_parameters { + __u16 recv_credit_max; + __u16 send_credit_target; + __u32 max_send_size; + __u32 max_fragmented_send_size; + __u32 max_recv_size; + __u32 max_fragmented_recv_size; + __u32 max_read_write_size; + __u32 keepalive_interval_msec; + __u32 keepalive_timeout_msec; +} __packed; + +#endif /* __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_H__ */ diff --git a/fs/smb/common/smbdirect/smbdirect_pdu.h b/fs/smb/common/smbdirect/smbdirect_pdu.h new file mode 100644 index 0000000000000..ae9fdb05ce231 --- /dev/null +++ b/fs/smb/common/smbdirect/smbdirect_pdu.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2017 Stefan Metzmacher + */ + +#ifndef __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_PDU_H__ +#define __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_PDU_H__ + +#define SMBDIRECT_V1 0x0100 + +/* SMBD negotiation request packet [MS-SMBD] 2.2.1 */ +struct smbdirect_negotiate_req { + __le16 min_version; + __le16 max_version; + __le16 reserved; + __le16 credits_requested; + __le32 preferred_send_size; + __le32 max_receive_size; + __le32 max_fragmented_size; +} __packed; + +/* SMBD negotiation response packet [MS-SMBD] 2.2.2 */ +struct smbdirect_negotiate_resp { + __le16 min_version; + __le16 max_version; + __le16 negotiated_version; + __le16 reserved; + __le16 credits_requested; + __le16 credits_granted; + __le32 status; + __le32 max_readwrite_size; + __le32 preferred_send_size; + __le32 max_receive_size; + __le32 max_fragmented_size; +} __packed; + +#define SMBDIRECT_DATA_MIN_HDR_SIZE 0x14 +#define SMBDIRECT_DATA_OFFSET 0x18 + +#define SMBDIRECT_FLAG_RESPONSE_REQUESTED 0x0001 + +/* SMBD data transfer packet with payload [MS-SMBD] 2.2.3 */ +struct smbdirect_data_transfer { + __le16 credits_requested; + __le16 credits_granted; + __le16 flags; + __le16 reserved; + __le32 remaining_data_length; + __le32 data_offset; + __le32 data_length; + __le32 padding; + __u8 buffer[]; +} __packed; + +#endif /* __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_PDU_H__ */ diff --git a/fs/smb/common/smbdirect/smbdirect_socket.h b/fs/smb/common/smbdirect/smbdirect_socket.h new file mode 100644 index 0000000000000..e5b15cc44a7ba --- /dev/null +++ b/fs/smb/common/smbdirect/smbdirect_socket.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2025 Stefan Metzmacher + */ + +#ifndef __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_SOCKET_H__ +#define __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_SOCKET_H__ + +enum smbdirect_socket_status { + SMBDIRECT_SOCKET_CREATED, + SMBDIRECT_SOCKET_CONNECTING, + SMBDIRECT_SOCKET_CONNECTED, + SMBDIRECT_SOCKET_NEGOTIATE_FAILED, + SMBDIRECT_SOCKET_DISCONNECTING, + SMBDIRECT_SOCKET_DISCONNECTED, + SMBDIRECT_SOCKET_DESTROYED +}; + +struct smbdirect_socket { + enum smbdirect_socket_status status; + + /* RDMA related */ + struct { + struct rdma_cm_id *cm_id; + } rdma; + + /* IB verbs related */ + struct { + struct ib_pd *pd; + struct ib_cq *send_cq; + struct ib_cq *recv_cq; + + /* + * shortcuts for rdma.cm_id->{qp,device}; + */ + struct ib_qp *qp; + struct ib_device *dev; + } ib; + + struct smbdirect_socket_parameters parameters; +}; + +#endif /* __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_SOCKET_H__ */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 3559446279c15..4d01ec8c0a58a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3405,6 +3405,8 @@ extern int simple_write_begin(struct file *file, struct address_space *mapping, extern const struct address_space_operations ram_aops; extern int always_delete_dentry(const struct dentry *); extern struct inode *alloc_anon_inode(struct super_block *); +struct inode *anon_inode_make_secure_inode(struct super_block *sb, const char *name, + const struct inode *context_inode); extern int simple_nosetlease(struct file *, int, struct file_lease **, void **); extern const struct dentry_operations simple_dentry_operations; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 46f144c3aa398..fc78f7ca0aa56 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4035,7 +4035,17 @@ static __always_inline int ____dev_forward_skb(struct net_device *dev, return 0; } -bool dev_nit_active(struct net_device *dev); +bool dev_nit_active_rcu(const struct net_device *dev); +static inline bool dev_nit_active(const struct net_device *dev) +{ + bool ret; + + rcu_read_lock(); + ret = dev_nit_active_rcu(dev); + rcu_read_unlock(); + return ret; +} + void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev); static inline void __dev_put(struct net_device *dev) diff --git a/include/net/hotdata.h b/include/net/hotdata.h index 30e9570beb2af..fda94b2647ffa 100644 --- a/include/net/hotdata.h +++ b/include/net/hotdata.h @@ -23,7 +23,6 @@ struct net_hotdata { struct net_offload udpv6_offload; #endif struct list_head offload_base; - struct list_head ptype_all; struct kmem_cache *skbuff_cache; struct kmem_cache *skbuff_fclone_cache; struct kmem_cache *skb_small_head_cache; diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index fffc4b5b50b17..adc476e8be446 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -189,6 +189,9 @@ struct net { #if IS_ENABLED(CONFIG_SMC) struct netns_smc smc; #endif + + RH_KABI_EXTEND(struct list_head ptype_all) + RH_KABI_EXTEND(struct list_head ptype_specific) } __randomize_layout; #include diff --git a/include/net/tcp.h b/include/net/tcp.h index d1948d357dade..5920663519d7a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -41,6 +41,7 @@ #include #include #include +#include RH_KABI_HIDE_INCLUDE() #include #include @@ -683,6 +684,19 @@ void tcp_fin(struct sock *sk); void tcp_check_space(struct sock *sk); void tcp_sack_compress_send_ack(struct sock *sk); +static inline void tcp_cleanup_skb(struct sk_buff *skb) +{ + skb_dst_drop(skb); + secpath_reset(skb); +} + +static inline void tcp_add_receive_queue(struct sock *sk, struct sk_buff *skb) +{ + DEBUG_NET_WARN_ON_ONCE(skb_dst(skb)); + DEBUG_NET_WARN_ON_ONCE(secpath_exists(skb)); + __skb_queue_tail(&sk->sk_receive_queue, skb); +} + /* tcp_timer.c */ void tcp_init_xmit_timers(struct sock *); static inline void tcp_clear_xmit_timers(struct sock *sk) diff --git a/io_uring/futex.c b/io_uring/futex.c index 914848f46beb2..90ff427f0294e 100644 --- a/io_uring/futex.c +++ b/io_uring/futex.c @@ -337,6 +337,7 @@ int io_futex_wait(struct io_kiocb *req, unsigned int issue_flags) goto done_unlock; } + req->flags |= REQ_F_ASYNC_DATA; req->async_data = ifd; ifd->q = futex_q_init; ifd->q.bitset = iof->futex_mask; @@ -359,6 +360,8 @@ int io_futex_wait(struct io_kiocb *req, unsigned int issue_flags) if (ret < 0) req_set_fail(req); io_req_set_res(req, ret, 0); + req->async_data = NULL; + req->flags &= ~REQ_F_ASYNC_DATA; kfree(ifd); return IOU_OK; } diff --git a/mm/secretmem.c b/mm/secretmem.c index 399552814fd0f..4662f2510ae5f 100644 --- a/mm/secretmem.c +++ b/mm/secretmem.c @@ -195,19 +195,11 @@ static struct file *secretmem_file_create(unsigned long flags) struct file *file; struct inode *inode; const char *anon_name = "[secretmem]"; - const struct qstr qname = QSTR_INIT(anon_name, strlen(anon_name)); - int err; - inode = alloc_anon_inode(secretmem_mnt->mnt_sb); + inode = anon_inode_make_secure_inode(secretmem_mnt->mnt_sb, anon_name, NULL); if (IS_ERR(inode)) return ERR_CAST(inode); - err = security_inode_init_security_anon(inode, &qname, NULL); - if (err) { - file = ERR_PTR(err); - goto err_free_inode; - } - file = alloc_file_pseudo(inode, secretmem_mnt, "secretmem", O_RDWR, &secretmem_fops); if (IS_ERR(file)) diff --git a/net/core/dev.c b/net/core/dev.c index 556b8e9eadab3..0c64e6dfc02d2 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -570,10 +570,18 @@ static inline void netdev_set_addr_lockdep_class(struct net_device *dev) static inline struct list_head *ptype_head(const struct packet_type *pt) { - if (pt->type == htons(ETH_P_ALL)) - return pt->dev ? &pt->dev->ptype_all : &net_hotdata.ptype_all; - else - return pt->dev ? &pt->dev->ptype_specific : + if (pt->type == htons(ETH_P_ALL)) { + if (!pt->af_packet_net && !pt->dev) + return NULL; + + return pt->dev ? &pt->dev->ptype_all : + &pt->af_packet_net->ptype_all; + } + + if (pt->dev) + return &pt->dev->ptype_specific; + + return pt->af_packet_net ? &pt->af_packet_net->ptype_specific : &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK]; } @@ -594,6 +602,9 @@ void dev_add_pack(struct packet_type *pt) { struct list_head *head = ptype_head(pt); + if (WARN_ON_ONCE(!head)) + return; + spin_lock(&ptype_lock); list_add_rcu(&pt->list, head); spin_unlock(&ptype_lock); @@ -618,6 +629,9 @@ void __dev_remove_pack(struct packet_type *pt) struct list_head *head = ptype_head(pt); struct packet_type *pt1; + if (!head) + return; + spin_lock(&ptype_lock); list_for_each_entry(pt1, head, list) { @@ -2271,16 +2285,21 @@ static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb) } /** - * dev_nit_active - return true if any network interface taps are in use + * dev_nit_active_rcu - return true if any network interface taps are in use + * + * The caller must hold the RCU lock * * @dev: network device to check for the presence of taps */ -bool dev_nit_active(struct net_device *dev) +bool dev_nit_active_rcu(const struct net_device *dev) { - return !list_empty(&net_hotdata.ptype_all) || + /* Callers may hold either RCU or RCU BH lock */ + WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); + + return !list_empty(&dev_net(dev)->ptype_all) || !list_empty(&dev->ptype_all); } -EXPORT_SYMBOL_GPL(dev_nit_active); +EXPORT_SYMBOL_GPL(dev_nit_active_rcu); /* * Support routine. Sends outgoing frames to any network @@ -2289,11 +2308,12 @@ EXPORT_SYMBOL_GPL(dev_nit_active); void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) { - struct list_head *ptype_list = &net_hotdata.ptype_all; struct packet_type *ptype, *pt_prev = NULL; + struct list_head *ptype_list; struct sk_buff *skb2 = NULL; rcu_read_lock(); + ptype_list = &dev_net_rcu(dev)->ptype_all; again: list_for_each_entry_rcu(ptype, ptype_list, list) { if (READ_ONCE(ptype->ignore_outgoing)) @@ -2337,7 +2357,7 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) pt_prev = ptype; } - if (ptype_list == &net_hotdata.ptype_all) { + if (ptype_list != &dev->ptype_all) { ptype_list = &dev->ptype_all; goto again; } @@ -3540,6 +3560,18 @@ static netdev_features_t gso_features_check(const struct sk_buff *skb, features &= ~NETIF_F_TSO_MANGLEID; } + /* NETIF_F_IPV6_CSUM does not support IPv6 extension headers, + * so neither does TSO that depends on it. + */ + if (features & NETIF_F_IPV6_CSUM && + (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6 || + (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 && + vlan_get_protocol(skb) == htons(ETH_P_IPV6))) && + skb_transport_header_was_set(skb) && + skb_network_header_len(skb) != sizeof(struct ipv6hdr) && + !ipv6_has_hopopt_jumbo(skb)) + features &= ~(NETIF_F_IPV6_CSUM | NETIF_F_TSO6 | NETIF_F_GSO_UDP_L4); + return features; } @@ -3580,7 +3612,7 @@ static int xmit_one(struct sk_buff *skb, struct net_device *dev, unsigned int len; int rc; - if (dev_nit_active(dev)) + if (dev_nit_active_rcu(dev)) dev_queue_xmit_nit(skb, dev); len = skb->len; @@ -5514,7 +5546,8 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc, if (pfmemalloc) goto skip_taps; - list_for_each_entry_rcu(ptype, &net_hotdata.ptype_all, list) { + list_for_each_entry_rcu(ptype, &dev_net_rcu(skb->dev)->ptype_all, + list) { if (pt_prev) ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = ptype; @@ -5626,6 +5659,14 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc, deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type, &ptype_base[ntohs(type) & PTYPE_HASH_MASK]); + + /* orig_dev and skb->dev could belong to different netns; + * Even in such case we need to traverse only the list + * coming from skb->dev, as the ptype owner (packet socket) + * will use dev_net(skb->dev) to do namespace filtering. + */ + deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type, + &dev_net_rcu(skb->dev)->ptype_specific); } deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type, diff --git a/net/core/hotdata.c b/net/core/hotdata.c index d0aaaaa556f22..0bc893d5f07b0 100644 --- a/net/core/hotdata.c +++ b/net/core/hotdata.c @@ -7,7 +7,6 @@ struct net_hotdata net_hotdata __cacheline_aligned = { .offload_base = LIST_HEAD_INIT(net_hotdata.offload_base), - .ptype_all = LIST_HEAD_INIT(net_hotdata.ptype_all), .gro_normal_batch = 8, .netdev_budget = 300, diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c index fa6d3969734a6..3e92bf0f9060b 100644 --- a/net/core/net-procfs.c +++ b/net/core/net-procfs.c @@ -185,7 +185,13 @@ static void *ptype_get_idx(struct seq_file *seq, loff_t pos) } } - list_for_each_entry_rcu(pt, &net_hotdata.ptype_all, list) { + list_for_each_entry_rcu(pt, &seq_file_net(seq)->ptype_all, list) { + if (i == pos) + return pt; + ++i; + } + + list_for_each_entry_rcu(pt, &seq_file_net(seq)->ptype_specific, list) { if (i == pos) return pt; ++i; @@ -210,6 +216,7 @@ static void *ptype_seq_start(struct seq_file *seq, loff_t *pos) static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos) { + struct net *net = seq_file_net(seq); struct net_device *dev; struct packet_type *pt; struct list_head *nxt; @@ -232,15 +239,22 @@ static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos) goto found; } } - - nxt = net_hotdata.ptype_all.next; - goto ptype_all; + nxt = net->ptype_all.next; + goto net_ptype_all; } - if (pt->type == htons(ETH_P_ALL)) { -ptype_all: - if (nxt != &net_hotdata.ptype_all) + if (pt->af_packet_net) { +net_ptype_all: + if (nxt != &net->ptype_all && nxt != &net->ptype_specific) goto found; + + if (nxt == &net->ptype_all) { + /* continue with ->ptype_specific if it's not empty */ + nxt = net->ptype_specific.next; + if (nxt != &net->ptype_specific) + goto found; + } + hash = 0; nxt = ptype_base[0].next; } else diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 70fea7c1a4b0a..19f33e26a8e89 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -334,6 +334,9 @@ static __net_init void preinit_net(struct net *net, struct user_namespace *user_ idr_init(&net->netns_ids); spin_lock_init(&net->nsid_lock); mutex_init(&net->ipv4.ra_mutex); + + INIT_LIST_HEAD(&net->ptype_all); + INIT_LIST_HEAD(&net->ptype_specific); preinit_net_sysctl(net); } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 4f77bd862e957..18f3a5b0f42af 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1145,7 +1145,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) goto do_error; while (msg_data_left(msg)) { - ssize_t copy = 0; + int copy = 0; skb = tcp_write_queue_tail(sk); if (skb) diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 0f523cbfe329e..32b28fc21b63c 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -178,7 +178,7 @@ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb) if (!skb) return; - skb_dst_drop(skb); + tcp_cleanup_skb(skb); /* segs_in has been initialized to 1 in tcp_create_openreq_child(). * Hence, reset segs_in to 0 before calling tcp_segs_in() * to avoid double counting. Also, tcp_segs_in() expects @@ -195,7 +195,7 @@ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb) TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_SYN; tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; - __skb_queue_tail(&sk->sk_receive_queue, skb); + tcp_add_receive_queue(sk, skb); tp->syn_data_acked = 1; /* u64_stats_update_begin(&tp->syncp) not needed here, diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 4ffebfb503269..0ee22e10fcfae 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4970,7 +4970,7 @@ static void tcp_ofo_queue(struct sock *sk) tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN; if (!eaten) - __skb_queue_tail(&sk->sk_receive_queue, skb); + tcp_add_receive_queue(sk, skb); else kfree_skb_partial(skb, fragstolen); @@ -5162,7 +5162,7 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, skb, fragstolen)) ? 1 : 0; tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq); if (!eaten) { - __skb_queue_tail(&sk->sk_receive_queue, skb); + tcp_add_receive_queue(sk, skb); skb_set_owner_r(skb, sk); } return eaten; @@ -5245,7 +5245,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) __kfree_skb(skb); return; } - skb_dst_drop(skb); + tcp_cleanup_skb(skb); __skb_pull(skb, tcp_hdr(skb)->doff * 4); reason = SKB_DROP_REASON_NOT_SPECIFIED; @@ -6214,7 +6214,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS); /* Bulk data transfer: receiver */ - skb_dst_drop(skb); + tcp_cleanup_skb(skb); __skb_pull(skb, tcp_header_len); eaten = tcp_queue_rcv(sk, skb, &fragstolen); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 5afe5e57c89b5..49560a33f3860 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2024,7 +2024,7 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb, */ skb_condense(skb); - skb_dst_drop(skb); + tcp_cleanup_skb(skb); if (unlikely(tcp_checksum_complete(skb))) { bh_unlock_sock(sk); diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 6551648512585..ec1c37cbe04cf 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -804,8 +804,8 @@ static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im) } else { im->mca_crcount = idev->mc_qrv; } - in6_dev_put(pmc->idev); ip6_mc_clear_src(pmc); + in6_dev_put(pmc->idev); kfree_rcu(pmc, rcu); } } diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 59e2c46240f5c..8e1688239cb30 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -257,20 +257,47 @@ svc_tcp_sock_process_cmsg(struct socket *sock, struct msghdr *msg, } static int -svc_tcp_sock_recv_cmsg(struct svc_sock *svsk, struct msghdr *msg) +svc_tcp_sock_recv_cmsg(struct socket *sock, unsigned int *msg_flags) { union { struct cmsghdr cmsg; u8 buf[CMSG_SPACE(sizeof(u8))]; } u; - struct socket *sock = svsk->sk_sock; + u8 alert[2]; + struct kvec alert_kvec = { + .iov_base = alert, + .iov_len = sizeof(alert), + }; + struct msghdr msg = { + .msg_flags = *msg_flags, + .msg_control = &u, + .msg_controllen = sizeof(u), + }; + int ret; + + iov_iter_kvec(&msg.msg_iter, ITER_DEST, &alert_kvec, 1, + alert_kvec.iov_len); + ret = sock_recvmsg(sock, &msg, MSG_DONTWAIT); + if (ret > 0 && + tls_get_record_type(sock->sk, &u.cmsg) == TLS_RECORD_TYPE_ALERT) { + iov_iter_revert(&msg.msg_iter, ret); + ret = svc_tcp_sock_process_cmsg(sock, &msg, &u.cmsg, -EAGAIN); + } + return ret; +} + +static int +svc_tcp_sock_recvmsg(struct svc_sock *svsk, struct msghdr *msg) +{ int ret; + struct socket *sock = svsk->sk_sock; - msg->msg_control = &u; - msg->msg_controllen = sizeof(u); ret = sock_recvmsg(sock, msg, MSG_DONTWAIT); - if (unlikely(msg->msg_controllen != sizeof(u))) - ret = svc_tcp_sock_process_cmsg(sock, msg, &u.cmsg, ret); + if (msg->msg_flags & MSG_CTRUNC) { + msg->msg_flags &= ~(MSG_CTRUNC | MSG_EOR); + if (ret == 0 || ret == -EIO) + ret = svc_tcp_sock_recv_cmsg(sock, &msg->msg_flags); + } return ret; } @@ -321,7 +348,7 @@ static ssize_t svc_tcp_read_msg(struct svc_rqst *rqstp, size_t buflen, iov_iter_advance(&msg.msg_iter, seek); buflen -= seek; } - len = svc_tcp_sock_recv_cmsg(svsk, &msg); + len = svc_tcp_sock_recvmsg(svsk, &msg); if (len > 0) svc_flush_bvec(bvec, len, seek); @@ -1019,7 +1046,7 @@ static ssize_t svc_tcp_read_marker(struct svc_sock *svsk, iov.iov_base = ((char *)&svsk->sk_marker) + svsk->sk_tcplen; iov.iov_len = want; iov_iter_kvec(&msg.msg_iter, ITER_DEST, &iov, 1, want); - len = svc_tcp_sock_recv_cmsg(svsk, &msg); + len = svc_tcp_sock_recvmsg(svsk, &msg); if (len < 0) return len; svsk->sk_tcplen += len; diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index bbf26cc4f6ee2..4037c45936c3f 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1749,6 +1749,9 @@ int decrypt_skb(struct sock *sk, struct scatterlist *sgout) return tls_decrypt_sg(sk, NULL, sgout, &darg); } +/* All records returned from a recvmsg() call must have the same type. + * 0 is not a valid content type. Use it as "no type reported, yet". + */ static int tls_record_content_type(struct msghdr *msg, struct tls_msg *tlm, u8 *control) { @@ -1992,8 +1995,10 @@ int tls_sw_recvmsg(struct sock *sk, if (err < 0) goto end; + /* process_rx_list() will set @control if it processed any records */ copied = err; - if (len <= copied || (copied && control != TLS_RECORD_TYPE_DATA) || rx_more) + if (len <= copied || rx_more || + (control && control != TLS_RECORD_TYPE_DATA)) goto end; target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c index 98f1e2b67c76b..b95d882f9dbcb 100644 --- a/net/xfrm/xfrm_interface_core.c +++ b/net/xfrm/xfrm_interface_core.c @@ -874,7 +874,7 @@ static int xfrmi_changelink(struct net_device *dev, struct nlattr *tb[], return -EINVAL; } - if (p.collect_md) { + if (p.collect_md || xi->p.collect_md) { NL_SET_ERR_MSG(extack, "collect_md can't be changed"); return -EINVAL; } @@ -885,11 +885,6 @@ static int xfrmi_changelink(struct net_device *dev, struct nlattr *tb[], } else { if (xi->dev != dev) return -EEXIST; - if (xi->p.collect_md) { - NL_SET_ERR_MSG(extack, - "device can't be changed to collect_md"); - return -EINVAL; - } } return xfrmi_update(xi, &p); diff --git a/redhat/kernel.changelog-10.0 b/redhat/kernel.changelog-10.0 index 26b686eb18f02..3e461636865b5 100644 --- a/redhat/kernel.changelog-10.0 +++ b/redhat/kernel.changelog-10.0 @@ -1,3 +1,56 @@ +* Wed Sep 24 2025 CKI KWF Bot [6.12.0-55.37.1.el10_0] +- selftests: tls: add tests for zero-length records (Sabrina Dubroca) [RHEL-114331] {CVE-2025-39682} +- tls: fix handling of zero-length records on the rx_list (Sabrina Dubroca) [RHEL-114331] {CVE-2025-39682} +- fs: export anon_inode_make_secure_inode() and fix secretmem LSM bypass (Audra Mitchell) [RHEL-110313] {CVE-2025-38396} +Resolves: RHEL-110313, RHEL-114331 + +* Mon Sep 22 2025 CKI KWF Bot [6.12.0-55.36.1.el10_0] +- io_uring/futex: ensure io_futex_wait() cleans up properly on failure (CKI Backport Bot) [RHEL-114341] {CVE-2025-39698} +- ice: use fixed adapter index for E825C embedded devices (CKI Backport Bot) [RHEL-111792] +- ice: use DSN instead of PCI BDF for ice_adapter index (CKI Backport Bot) [RHEL-111792] +- tcp: drop secpath at the same time as we currently drop dst (Sabrina Dubroca) [RHEL-82133] +Resolves: RHEL-111792, RHEL-114341, RHEL-82133 + +* Fri Sep 19 2025 CKI KWF Bot [6.12.0-55.35.1.el10_0] +- cifs: Fix reading into an ITER_FOLIOQ from the smbdirect code (Paulo Alcantara) [RHEL-111177] +- cifs: Fix the smbd_response slab to allow usercopy (Paulo Alcantara) [RHEL-111177] {CVE-2025-38523} +- smb: client: let smbd_post_send_iter() respect the peers max_send_size and transmit all data (Paulo Alcantara) [RHEL-111177] +- smb: client: fix max_sge overflow in smb_extract_folioq_to_rdma() (Paulo Alcantara) [RHEL-111177] +- smb: client: make use of common smbdirect_socket_parameters (Paulo Alcantara) [RHEL-111177] +- smb: smbdirect: introduce smbdirect_socket_parameters (Paulo Alcantara) [RHEL-111177] +- smb: client: make use of common smbdirect_socket (Paulo Alcantara) [RHEL-111177] +- smb: smbdirect: add smbdirect_socket.h (Paulo Alcantara) [RHEL-111177] +- smb: client: make use of common smbdirect.h (Paulo Alcantara) [RHEL-111177] +- smb: smbdirect: add smbdirect.h with public structures (Paulo Alcantara) [RHEL-111177] +- smb: client: make use of common smbdirect_pdu.h (Paulo Alcantara) [RHEL-111177] +- smb: smbdirect: add smbdirect_pdu.h with protocol definitions (Paulo Alcantara) [RHEL-111177] +- s390/sclp: Fix SCCB present check (CKI Backport Bot) [RHEL-113561] {CVE-2025-39694} +- net: stmmac: fix TSO DMA API usage causing oops (Izabela Bakollari) [RHEL-105352] +- smb: client: fix use-after-free in cifs_oplock_break (CKI Backport Bot) [RHEL-111198] {CVE-2025-38527} +Resolves: RHEL-105352, RHEL-111177, RHEL-111198, RHEL-113561 + +* Mon Sep 15 2025 CKI KWF Bot [6.12.0-55.34.1.el10_0] +- sunrpc: fix handling of server side tls alerts (Olga Kornievskaia) [RHEL-111073] {CVE-2025-38566} +- i40e: When removing VF MAC filters, only check PF-set MAC (CKI Backport Bot) [RHEL-109771] +- usb: dwc3: gadget: check that event count does not exceed event buffer length (CKI Backport Bot) [RHEL-107659] {CVE-2025-37810} +Resolves: RHEL-107659, RHEL-109771, RHEL-111073 + +* Tue Sep 09 2025 Jan Stancek [6.12.0-55.33.1.el10_0] +- xfrm: interface: fix use-after-free after changing collect_md xfrm interface (CKI Backport Bot) [RHEL-109530] {CVE-2025-38500} +- idpf: convert control queue mutex to a spinlock (CKI Backport Bot) [RHEL-106061] {CVE-2025-38392} +- eth: bnxt: fix missing ring index trim on error path (CKI Backport Bot) [RHEL-104564] {CVE-2025-37873} +- tcp: Correct signedness in skb remaining space calculation (CKI Backport Bot) [RHEL-107844] {CVE-2025-38463} +- ipv6: mcast: Delay put pmc->idev in mld_del_delrec() (CKI Backport Bot) [RHEL-111154] {CVE-2025-38550} +- redhat: selftests/bpf: Add cpuv4 variant (Viktor Malik) [RHEL-109928] +- i40e: report VF tx_dropped with tx_errors instead of tx_discards (Dennis Chen) [RHEL-105138] {CVE-2025-38200} +- use uniform permission checks for all mount propagation changes (Ian Kent) [RHEL-107306] {CVE-2025-38498} +- do_change_type(): refuse to operate on unmounted/not ours mounts (Ian Kent) [RHEL-107306] {CVE-2025-38498} +- ublk: make sure ubq->canceling is set when queue is frozen (Ming Lei) [RHEL-99437] {CVE-2025-22068} +- net: gso: Forbid IPv6 TSO with extensions on devices with only IPV6_CSUM JIRA: https://issues.redhat.com/browse/RHEL-109821 Y-JIRA: https://issues.redhat.com/browse/RHEL-79173 (Jakub Ramaseuski) +- scsi: lpfc: Use memcpy() for BIOS version (Ewan D. Milne) [RHEL-105936] {CVE-2025-38332} +- net: introduce per netns packet chains (Paolo Abeni) [RHEL-88923] +Resolves: RHEL-104564, RHEL-105138, RHEL-105936, RHEL-106061, RHEL-107306, RHEL-107844, RHEL-109530, RHEL-109928, RHEL-111154, RHEL-88923, RHEL-99437 + * Tue Sep 09 2025 Jan Stancek [6.12.0-55.32.1.el10_0] - posix-cpu-timers: fix race between handle_posix_cpu_timers() and posix_cpu_timer_del() (CKI Backport Bot) [RHEL-112784] {CVE-2025-38352} Resolves: RHEL-112784 diff --git a/redhat/kernel.spec.template b/redhat/kernel.spec.template index 73ce3ce469286..cf4b8adaed365 100644 --- a/redhat/kernel.spec.template +++ b/redhat/kernel.spec.template @@ -3262,7 +3262,7 @@ export CFLAGS="%{build_cflags}" # 'make install' for bpf is broken and upstream refuses to fix it. # Install the needed files manually. %{log_msg "install selftests"} -for dir in bpf bpf/no_alu32 bpf/progs; do +for dir in bpf bpf/no_alu32 bpf/cpuv4 bpf/progs; do # In ARK, the rpm build continues even if some of the selftests # cannot be built. It's not always possible to build selftests, # as upstream sometimes dependens on too new llvm version or has @@ -3278,14 +3278,17 @@ done %buildroot_save_unstripped "usr/libexec/kselftests/bpf/test_progs" %buildroot_save_unstripped "usr/libexec/kselftests/bpf/test_progs-no_alu32" +%buildroot_save_unstripped "usr/libexec/kselftests/bpf/test_progs-cpuv4" # The urandom_read binary doesn't pass the check-rpaths check and upstream # refuses to fix it. So, we save it to buildroot_unstripped and delete it so it # will be hidden from check-rpaths and will automatically get restored later. %buildroot_save_unstripped "usr/libexec/kselftests/bpf/urandom_read" %buildroot_save_unstripped "usr/libexec/kselftests/bpf/no_alu32/urandom_read" +%buildroot_save_unstripped "usr/libexec/kselftests/bpf/cpuv4/urandom_read" rm -f %{buildroot}/usr/libexec/kselftests/bpf/urandom_read rm -f %{buildroot}/usr/libexec/kselftests/bpf/no_alu32/urandom_read +rm -f %{buildroot}/usr/libexec/kselftests/bpf/cpuv4/urandom_read popd %{log_msg "end build selftests"} diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 1a706d03bb6bb..c23f5d643ef08 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -179,13 +179,12 @@ static int tls_send_cmsg(int fd, unsigned char record_type, return sendmsg(fd, &msg, flags); } -static int tls_recv_cmsg(struct __test_metadata *_metadata, - int fd, unsigned char record_type, - void *data, size_t len, int flags) +static int __tls_recv_cmsg(struct __test_metadata *_metadata, + int fd, unsigned char *ctype, + void *data, size_t len, int flags) { char cbuf[CMSG_SPACE(sizeof(char))]; struct cmsghdr *cmsg; - unsigned char ctype; struct msghdr msg; struct iovec vec; int n; @@ -204,7 +203,20 @@ static int tls_recv_cmsg(struct __test_metadata *_metadata, EXPECT_NE(cmsg, NULL); EXPECT_EQ(cmsg->cmsg_level, SOL_TLS); EXPECT_EQ(cmsg->cmsg_type, TLS_GET_RECORD_TYPE); - ctype = *((unsigned char *)CMSG_DATA(cmsg)); + if (ctype) + *ctype = *((unsigned char *)CMSG_DATA(cmsg)); + + return n; +} + +static int tls_recv_cmsg(struct __test_metadata *_metadata, + int fd, unsigned char record_type, + void *data, size_t len, int flags) +{ + unsigned char ctype; + int n; + + n = __tls_recv_cmsg(_metadata, fd, &ctype, data, len, flags); EXPECT_EQ(ctype, record_type); return n; @@ -1668,6 +1680,283 @@ TEST_F(tls, recv_efault) EXPECT_EQ(memcmp(rec2, recv_mem + 9, ret - 9), 0); } +struct raw_rec { + unsigned int plain_len; + unsigned char plain_data[100]; + unsigned int cipher_len; + unsigned char cipher_data[128]; +}; + +/* TLS 1.2, AES_CCM, data, seqno:0, plaintext: 'Hello world' */ +static const struct raw_rec id0_data_l11 = { + .plain_len = 11, + .plain_data = { + 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, + 0x72, 0x6c, 0x64, + }, + .cipher_len = 40, + .cipher_data = { + 0x17, 0x03, 0x03, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x26, 0xa2, 0x33, + 0xde, 0x8d, 0x94, 0xf0, 0x29, 0x6c, 0xb1, 0xaf, + 0x6a, 0x75, 0xb2, 0x93, 0xad, 0x45, 0xd5, 0xfd, + 0x03, 0x51, 0x57, 0x8f, 0xf9, 0xcc, 0x3b, 0x42, + }, +}; + +/* TLS 1.2, AES_CCM, ctrl, seqno:0, plaintext: '' */ +static const struct raw_rec id0_ctrl_l0 = { + .plain_len = 0, + .plain_data = { + }, + .cipher_len = 29, + .cipher_data = { + 0x16, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0x38, 0x7b, + 0xa6, 0x1c, 0xdd, 0xa7, 0x19, 0x33, 0xab, 0xae, + 0x88, 0xe1, 0xd2, 0x08, 0x4f, + }, +}; + +/* TLS 1.2, AES_CCM, data, seqno:0, plaintext: '' */ +static const struct raw_rec id0_data_l0 = { + .plain_len = 0, + .plain_data = { + }, + .cipher_len = 29, + .cipher_data = { + 0x17, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0xc5, 0x37, 0x90, + 0x70, 0x45, 0x89, 0xfb, 0x5c, 0xc7, 0x89, 0x03, + 0x68, 0x80, 0xd3, 0xd8, 0xcc, + }, +}; + +/* TLS 1.2, AES_CCM, data, seqno:1, plaintext: 'Hello world' */ +static const struct raw_rec id1_data_l11 = { + .plain_len = 11, + .plain_data = { + 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, + 0x72, 0x6c, 0x64, + }, + .cipher_len = 40, + .cipher_data = { + 0x17, 0x03, 0x03, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x3a, 0x1a, 0x9c, + 0xd0, 0xa8, 0x9a, 0xd6, 0x69, 0xd6, 0x1a, 0xe3, + 0xb5, 0x1f, 0x0d, 0x2c, 0xe2, 0x97, 0x46, 0xff, + 0x2b, 0xcc, 0x5a, 0xc4, 0xa3, 0xb9, 0xef, 0xba, + }, +}; + +/* TLS 1.2, AES_CCM, ctrl, seqno:1, plaintext: '' */ +static const struct raw_rec id1_ctrl_l0 = { + .plain_len = 0, + .plain_data = { + }, + .cipher_len = 29, + .cipher_data = { + 0x16, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x3e, 0xf0, 0xfe, + 0xee, 0xd9, 0xe2, 0x5d, 0xc7, 0x11, 0x4c, 0xe6, + 0xb4, 0x7e, 0xef, 0x40, 0x2b, + }, +}; + +/* TLS 1.2, AES_CCM, data, seqno:1, plaintext: '' */ +static const struct raw_rec id1_data_l0 = { + .plain_len = 0, + .plain_data = { + }, + .cipher_len = 29, + .cipher_data = { + 0x17, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0xce, 0xfc, 0x86, + 0xc8, 0xf0, 0x55, 0xf9, 0x47, 0x3f, 0x74, 0xdc, + 0xc9, 0xbf, 0xfe, 0x5b, 0xb1, + }, +}; + +/* TLS 1.2, AES_CCM, ctrl, seqno:2, plaintext: 'Hello world' */ +static const struct raw_rec id2_ctrl_l11 = { + .plain_len = 11, + .plain_data = { + 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, + 0x72, 0x6c, 0x64, + }, + .cipher_len = 40, + .cipher_data = { + 0x16, 0x03, 0x03, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x02, 0xe5, 0x3d, 0x19, + 0x3d, 0xca, 0xb8, 0x16, 0xb6, 0xff, 0x79, 0x87, + 0x2a, 0x04, 0x11, 0x3d, 0xf8, 0x64, 0x5f, 0x36, + 0x8b, 0xa8, 0xee, 0x4c, 0x6d, 0x62, 0xa5, 0x00, + }, +}; + +/* TLS 1.2, AES_CCM, data, seqno:2, plaintext: 'Hello world' */ +static const struct raw_rec id2_data_l11 = { + .plain_len = 11, + .plain_data = { + 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, + 0x72, 0x6c, 0x64, + }, + .cipher_len = 40, + .cipher_data = { + 0x17, 0x03, 0x03, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x02, 0xe5, 0x3d, 0x19, + 0x3d, 0xca, 0xb8, 0x16, 0xb6, 0xff, 0x79, 0x87, + 0x8e, 0xa1, 0xd0, 0xcd, 0x33, 0xb5, 0x86, 0x2b, + 0x17, 0xf1, 0x52, 0x2a, 0x55, 0x62, 0x65, 0x11, + }, +}; + +/* TLS 1.2, AES_CCM, ctrl, seqno:2, plaintext: '' */ +static const struct raw_rec id2_ctrl_l0 = { + .plain_len = 0, + .plain_data = { + }, + .cipher_len = 29, + .cipher_data = { + 0x16, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x02, 0xdc, 0x5c, 0x0e, + 0x41, 0xdd, 0xba, 0xd3, 0xcc, 0xcf, 0x6d, 0xd9, + 0x06, 0xdb, 0x79, 0xe5, 0x5d, + }, +}; + +/* TLS 1.2, AES_CCM, data, seqno:2, plaintext: '' */ +static const struct raw_rec id2_data_l0 = { + .plain_len = 0, + .plain_data = { + }, + .cipher_len = 29, + .cipher_data = { + 0x17, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x02, 0xc3, 0xca, 0x26, + 0x22, 0xe4, 0x25, 0xfb, 0x5f, 0x6d, 0xbf, 0x83, + 0x30, 0x48, 0x69, 0x1a, 0x47, + }, +}; + +FIXTURE(zero_len) +{ + int fd, cfd; + bool notls; +}; + +FIXTURE_VARIANT(zero_len) +{ + const struct raw_rec *recs[4]; + ssize_t recv_ret[4]; +}; + +FIXTURE_VARIANT_ADD(zero_len, data_data_data) +{ + .recs = { &id0_data_l11, &id1_data_l11, &id2_data_l11, }, + .recv_ret = { 33, -EAGAIN, }, +}; + +FIXTURE_VARIANT_ADD(zero_len, data_0ctrl_data) +{ + .recs = { &id0_data_l11, &id1_ctrl_l0, &id2_data_l11, }, + .recv_ret = { 11, 0, 11, -EAGAIN, }, +}; + +FIXTURE_VARIANT_ADD(zero_len, 0data_0data_0data) +{ + .recs = { &id0_data_l0, &id1_data_l0, &id2_data_l0, }, + .recv_ret = { -EAGAIN, }, +}; + +FIXTURE_VARIANT_ADD(zero_len, 0data_0data_ctrl) +{ + .recs = { &id0_data_l0, &id1_data_l0, &id2_ctrl_l11, }, + .recv_ret = { 0, 11, -EAGAIN, }, +}; + +FIXTURE_VARIANT_ADD(zero_len, 0data_0data_0ctrl) +{ + .recs = { &id0_data_l0, &id1_data_l0, &id2_ctrl_l0, }, + .recv_ret = { 0, 0, -EAGAIN, }, +}; + +FIXTURE_VARIANT_ADD(zero_len, 0ctrl_0ctrl_0ctrl) +{ + .recs = { &id0_ctrl_l0, &id1_ctrl_l0, &id2_ctrl_l0, }, + .recv_ret = { 0, 0, 0, -EAGAIN, }, +}; + +FIXTURE_VARIANT_ADD(zero_len, 0data_0data_data) +{ + .recs = { &id0_data_l0, &id1_data_l0, &id2_data_l11, }, + .recv_ret = { 11, -EAGAIN, }, +}; + +FIXTURE_VARIANT_ADD(zero_len, data_0data_0data) +{ + .recs = { &id0_data_l11, &id1_data_l0, &id2_data_l0, }, + .recv_ret = { 11, -EAGAIN, }, +}; + +FIXTURE_SETUP(zero_len) +{ + struct tls_crypto_info_keys tls12; + int ret; + + tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_CCM_128, &tls12); + + ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls); + if (self->notls) + return; + + /* Don't install keys on fd, we'll send raw records */ + ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len); + ASSERT_EQ(ret, 0); +} + +FIXTURE_TEARDOWN(zero_len) +{ + close(self->fd); + close(self->cfd); +} + +TEST_F(zero_len, test) +{ + const struct raw_rec *const *rec; + unsigned char buf[128]; + int rec_off; + int i; + + for (i = 0; i < 4 && variant->recs[i]; i++) + EXPECT_EQ(send(self->fd, variant->recs[i]->cipher_data, + variant->recs[i]->cipher_len, 0), + variant->recs[i]->cipher_len); + + rec = &variant->recs[0]; + rec_off = 0; + for (i = 0; i < 4; i++) { + int j, ret; + + ret = variant->recv_ret[i] >= 0 ? variant->recv_ret[i] : -1; + EXPECT_EQ(__tls_recv_cmsg(_metadata, self->cfd, NULL, + buf, sizeof(buf), MSG_DONTWAIT), ret); + if (ret == -1) + EXPECT_EQ(errno, -variant->recv_ret[i]); + if (variant->recv_ret[i] == -EAGAIN) + break; + + for (j = 0; j < ret; j++) { + while (rec_off == (*rec)->plain_len) { + rec++; + rec_off = 0; + } + EXPECT_EQ(buf[j], (*rec)->plain_data[rec_off]); + rec_off++; + } + } +}; + FIXTURE(tls_err) { int fd, cfd;