Skip to content

Commit

Permalink
net: qualcomm: rmnet: add tx packets aggregation
Browse files Browse the repository at this point in the history
Add tx packets aggregation.

Bidirectional TCP throughput tests through iperf with low-cat
Thread-x based modems revelead performance issues both in tx
and rx.

The Windows driver does not show this issue: inspecting USB
packets revealed that the only notable change is the driver
enabling tx packets aggregation.

Tx packets aggregation is by default disabled and can be enabled
by increasing the value of ETHTOOL_A_COALESCE_TX_MAX_AGGR_FRAMES.

The maximum aggregated size is by default set to a reasonably low
value in order to support the majority of modems.

This implementation is based on patches available in Code Aurora
repositories (msm kernel) whose main authors are

Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Sean Tranchetti <stranche@codeaurora.org>

Signed-off-by: Daniele Palmas <dnlplm@gmail.com>
  • Loading branch information
dnlplm authored and intel-lab-lkp committed Nov 30, 2022
1 parent a8c8211 commit b97e1bd
Show file tree
Hide file tree
Showing 7 changed files with 246 additions and 4 deletions.
5 changes: 5 additions & 0 deletions drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
Expand Up @@ -12,6 +12,7 @@
#include "rmnet_handlers.h"
#include "rmnet_vnd.h"
#include "rmnet_private.h"
#include "rmnet_map.h"

/* Local Definitions and Declarations */

Expand Down Expand Up @@ -39,6 +40,8 @@ static int rmnet_unregister_real_device(struct net_device *real_dev)
if (port->nr_rmnet_devs)
return -EINVAL;

rmnet_map_tx_aggregate_exit(port);

netdev_rx_handler_unregister(real_dev);

kfree(port);
Expand Down Expand Up @@ -79,6 +82,8 @@ static int rmnet_register_real_device(struct net_device *real_dev,
for (entry = 0; entry < RMNET_MAX_LOGICAL_EP; entry++)
INIT_HLIST_HEAD(&port->muxed_ep[entry]);

rmnet_map_tx_aggregate_init(port);

netdev_dbg(real_dev, "registered with rmnet\n");
return 0;
}
Expand Down
20 changes: 20 additions & 0 deletions drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h
Expand Up @@ -6,6 +6,7 @@
*/

#include <linux/skbuff.h>
#include <linux/time.h>
#include <net/gro_cells.h>

#ifndef _RMNET_CONFIG_H_
Expand All @@ -19,6 +20,12 @@ struct rmnet_endpoint {
struct hlist_node hlnode;
};

struct rmnet_egress_agg_params {
u32 bytes;
u32 count;
u64 time_nsec;
};

/* One instance of this structure is instantiated for each real_dev associated
* with rmnet.
*/
Expand All @@ -30,6 +37,19 @@ struct rmnet_port {
struct hlist_head muxed_ep[RMNET_MAX_LOGICAL_EP];
struct net_device *bridge_ep;
struct net_device *rmnet_dev;

/* Egress aggregation information */
struct rmnet_egress_agg_params egress_agg_params;
/* Protect aggregation related elements */
spinlock_t agg_lock;
struct sk_buff *skbagg_head;
struct sk_buff *skbagg_tail;
int agg_state;
u8 agg_count;
struct timespec64 agg_time;
struct timespec64 agg_last;
struct hrtimer hrtimer;
struct work_struct agg_wq;
};

extern struct rtnl_link_ops rmnet_link_ops;
Expand Down
18 changes: 16 additions & 2 deletions drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
Expand Up @@ -164,8 +164,18 @@ static int rmnet_map_egress_handler(struct sk_buff *skb,

map_header->mux_id = mux_id;

skb->protocol = htons(ETH_P_MAP);
if (port->egress_agg_params.count > 1) {
unsigned int len;

len = rmnet_map_tx_aggregate(skb, port, orig_dev);
if (likely(len)) {
rmnet_vnd_tx_fixup_len(len, orig_dev);
return -EINPROGRESS;
}
return -ENOMEM;
}

skb->protocol = htons(ETH_P_MAP);
return 0;
}

Expand Down Expand Up @@ -235,6 +245,7 @@ void rmnet_egress_handler(struct sk_buff *skb)
struct rmnet_port *port;
struct rmnet_priv *priv;
u8 mux_id;
int err;

sk_pacing_shift_update(skb->sk, 8);

Expand All @@ -247,8 +258,11 @@ void rmnet_egress_handler(struct sk_buff *skb)
if (!port)
goto drop;

if (rmnet_map_egress_handler(skb, port, mux_id, orig_dev))
err = rmnet_map_egress_handler(skb, port, mux_id, orig_dev);
if (err == -ENOMEM)
goto drop;
else if (err == -EINPROGRESS)
return;

rmnet_vnd_tx_fixup(skb, orig_dev);

Expand Down
6 changes: 6 additions & 0 deletions drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
Expand Up @@ -53,5 +53,11 @@ void rmnet_map_checksum_uplink_packet(struct sk_buff *skb,
struct net_device *orig_dev,
int csum_type);
int rmnet_map_process_next_hdr_packet(struct sk_buff *skb, u16 len);
unsigned int rmnet_map_tx_aggregate(struct sk_buff *skb, struct rmnet_port *port,
struct net_device *orig_dev);
void rmnet_map_tx_aggregate_init(struct rmnet_port *port);
void rmnet_map_tx_aggregate_exit(struct rmnet_port *port);
void rmnet_map_update_ul_agg_config(struct rmnet_port *port, u32 size,
u32 count, u32 time);

#endif /* _RMNET_MAP_H_ */
191 changes: 191 additions & 0 deletions drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
Expand Up @@ -12,6 +12,7 @@
#include "rmnet_config.h"
#include "rmnet_map.h"
#include "rmnet_private.h"
#include "rmnet_vnd.h"

#define RMNET_MAP_DEAGGR_SPACING 64
#define RMNET_MAP_DEAGGR_HEADROOM (RMNET_MAP_DEAGGR_SPACING / 2)
Expand Down Expand Up @@ -518,3 +519,193 @@ int rmnet_map_process_next_hdr_packet(struct sk_buff *skb,

return 0;
}

#define RMNET_AGG_BYPASS_TIME_NSEC 10000000L

static void reset_aggr_params(struct rmnet_port *port)
{
port->skbagg_head = NULL;
port->agg_count = 0;
port->agg_state = 0;
memset(&port->agg_time, 0, sizeof(struct timespec64));
}

static void rmnet_send_skb(struct rmnet_port *port, struct sk_buff *skb)
{
if (skb_needs_linearize(skb, port->dev->features)) {
if (unlikely(__skb_linearize(skb))) {
struct rmnet_priv *priv;

priv = netdev_priv(port->rmnet_dev);
this_cpu_inc(priv->pcpu_stats->stats.tx_drops);
dev_kfree_skb_any(skb);
return;
}
}

dev_queue_xmit(skb);
}

static void rmnet_map_flush_tx_packet_work(struct work_struct *work)
{
struct sk_buff *skb = NULL;
struct rmnet_port *port;

port = container_of(work, struct rmnet_port, agg_wq);

spin_lock_bh(&port->agg_lock);
if (likely(port->agg_state == -EINPROGRESS)) {
/* Buffer may have already been shipped out */
if (likely(port->skbagg_head)) {
skb = port->skbagg_head;
reset_aggr_params(port);
}
port->agg_state = 0;
}

spin_unlock_bh(&port->agg_lock);
if (skb)
rmnet_send_skb(port, skb);
}

enum hrtimer_restart rmnet_map_flush_tx_packet_queue(struct hrtimer *t)
{
struct rmnet_port *port;

port = container_of(t, struct rmnet_port, hrtimer);

schedule_work(&port->agg_wq);

return HRTIMER_NORESTART;
}

unsigned int rmnet_map_tx_aggregate(struct sk_buff *skb, struct rmnet_port *port,
struct net_device *orig_dev)
{
struct timespec64 diff, last;
unsigned int len = skb->len;
struct sk_buff *agg_skb;
int size;

spin_lock_bh(&port->agg_lock);
memcpy(&last, &port->agg_last, sizeof(struct timespec64));
ktime_get_real_ts64(&port->agg_last);

if (!port->skbagg_head) {
/* Check to see if we should agg first. If the traffic is very
* sparse, don't aggregate.
*/
new_packet:
diff = timespec64_sub(port->agg_last, last);
size = port->egress_agg_params.bytes - skb->len;

if (size < 0) {
/* dropped */
spin_unlock_bh(&port->agg_lock);
return 0;
}

if (diff.tv_sec > 0 || diff.tv_nsec > RMNET_AGG_BYPASS_TIME_NSEC ||
size == 0) {
spin_unlock_bh(&port->agg_lock);
skb->protocol = htons(ETH_P_MAP);
dev_queue_xmit(skb);
return len;
}

port->skbagg_head = skb_copy_expand(skb, 0, size, GFP_ATOMIC);
if (!port->skbagg_head) {
spin_unlock_bh(&port->agg_lock);
skb->protocol = htons(ETH_P_MAP);
dev_queue_xmit(skb);
return len;
}
dev_kfree_skb_any(skb);
port->skbagg_head->protocol = htons(ETH_P_MAP);
port->agg_count = 1;
ktime_get_real_ts64(&port->agg_time);
skb_frag_list_init(port->skbagg_head);
goto schedule;
}
diff = timespec64_sub(port->agg_last, port->agg_time);
size = port->egress_agg_params.bytes - port->skbagg_head->len;

if (skb->len > size) {
agg_skb = port->skbagg_head;
reset_aggr_params(port);
spin_unlock_bh(&port->agg_lock);
hrtimer_cancel(&port->hrtimer);
rmnet_send_skb(port, agg_skb);
spin_lock_bh(&port->agg_lock);
goto new_packet;
}

if (skb_has_frag_list(port->skbagg_head))
port->skbagg_tail->next = skb;
else
skb_shinfo(port->skbagg_head)->frag_list = skb;

port->skbagg_head->len += skb->len;
port->skbagg_head->data_len += skb->len;
port->skbagg_head->truesize += skb->truesize;
port->skbagg_tail = skb;
port->agg_count++;

if (diff.tv_sec > 0 || diff.tv_nsec > port->egress_agg_params.time_nsec ||
port->agg_count == port->egress_agg_params.count ||
port->skbagg_head->len == port->egress_agg_params.bytes) {
agg_skb = port->skbagg_head;
reset_aggr_params(port);
spin_unlock_bh(&port->agg_lock);
hrtimer_cancel(&port->hrtimer);
rmnet_send_skb(port, agg_skb);
return len;
}

schedule:
if (!hrtimer_active(&port->hrtimer) && port->agg_state != -EINPROGRESS) {
port->agg_state = -EINPROGRESS;
hrtimer_start(&port->hrtimer,
ns_to_ktime(port->egress_agg_params.time_nsec),
HRTIMER_MODE_REL);
}
spin_unlock_bh(&port->agg_lock);

return len;
}

void rmnet_map_update_ul_agg_config(struct rmnet_port *port, u32 size,
u32 count, u32 time)
{
spin_lock_bh(&port->agg_lock);
port->egress_agg_params.bytes = size;
port->egress_agg_params.count = count;
port->egress_agg_params.time_nsec = time * NSEC_PER_USEC;
spin_unlock_bh(&port->agg_lock);
}

void rmnet_map_tx_aggregate_init(struct rmnet_port *port)
{
hrtimer_init(&port->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
port->hrtimer.function = rmnet_map_flush_tx_packet_queue;
spin_lock_init(&port->agg_lock);
rmnet_map_update_ul_agg_config(port, 4096, 1, 800);
INIT_WORK(&port->agg_wq, rmnet_map_flush_tx_packet_work);
}

void rmnet_map_tx_aggregate_exit(struct rmnet_port *port)
{
hrtimer_cancel(&port->hrtimer);
cancel_work_sync(&port->agg_wq);

spin_lock_bh(&port->agg_lock);
if (port->agg_state == -EINPROGRESS) {
if (port->skbagg_head) {
dev_kfree_skb_any(port->skbagg_head);
reset_aggr_params(port);
}

port->agg_state = 0;
}
spin_unlock_bh(&port->agg_lock);
}
9 changes: 7 additions & 2 deletions drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
Expand Up @@ -29,7 +29,7 @@ void rmnet_vnd_rx_fixup(struct sk_buff *skb, struct net_device *dev)
u64_stats_update_end(&pcpu_ptr->syncp);
}

void rmnet_vnd_tx_fixup(struct sk_buff *skb, struct net_device *dev)
void rmnet_vnd_tx_fixup_len(unsigned int len, struct net_device *dev)
{
struct rmnet_priv *priv = netdev_priv(dev);
struct rmnet_pcpu_stats *pcpu_ptr;
Expand All @@ -38,10 +38,15 @@ void rmnet_vnd_tx_fixup(struct sk_buff *skb, struct net_device *dev)

u64_stats_update_begin(&pcpu_ptr->syncp);
pcpu_ptr->stats.tx_pkts++;
pcpu_ptr->stats.tx_bytes += skb->len;
pcpu_ptr->stats.tx_bytes += len;
u64_stats_update_end(&pcpu_ptr->syncp);
}

void rmnet_vnd_tx_fixup(struct sk_buff *skb, struct net_device *dev)
{
rmnet_vnd_tx_fixup_len(skb->len, dev);
}

/* Network Device Operations */

static netdev_tx_t rmnet_vnd_start_xmit(struct sk_buff *skb,
Expand Down
1 change: 1 addition & 0 deletions drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h
Expand Up @@ -16,6 +16,7 @@ int rmnet_vnd_newlink(u8 id, struct net_device *rmnet_dev,
int rmnet_vnd_dellink(u8 id, struct rmnet_port *port,
struct rmnet_endpoint *ep);
void rmnet_vnd_rx_fixup(struct sk_buff *skb, struct net_device *dev);
void rmnet_vnd_tx_fixup_len(unsigned int len, struct net_device *dev);
void rmnet_vnd_tx_fixup(struct sk_buff *skb, struct net_device *dev);
void rmnet_vnd_setup(struct net_device *dev);
int rmnet_vnd_validate_real_dev_mtu(struct net_device *real_dev);
Expand Down

0 comments on commit b97e1bd

Please sign in to comment.