Skip to content

Commit ca2f09f

Browse files
Paul Durrantdavem330
authored andcommitted
xen-netback: improve guest-receive-side flow control
The way that flow control works without this patch is that, in start_xmit() the code uses xenvif_count_skb_slots() to predict how many slots xenvif_gop_skb() will consume and then adds this to a 'req_cons_peek' counter which it then uses to determine if the shared ring has that amount of space available by checking whether 'req_prod' has passed that value. If the ring doesn't have space the tx queue is stopped. xenvif_gop_skb() will then consume slots and update 'req_cons' and issue responses, updating 'rsp_prod' as it goes. The frontend will consume those responses and post new requests, by updating req_prod. So, req_prod chases req_cons which chases rsp_prod, and can never exceed that value. Thus if xenvif_count_skb_slots() ever returns a number of slots greater than xenvif_gop_skb() uses, req_cons_peek will get to a value that req_prod cannot possibly achieve (since it's limited by the 'real' req_cons) and, if this happens enough times, req_cons_peek gets more than a ring size ahead of req_cons and the tx queue then remains stopped forever waiting for an unachievable amount of space to become available in the ring. Having two routines trying to calculate the same value is always going to be fragile, so this patch does away with that. All we essentially need to do is make sure that we have 'enough stuff' on our internal queue without letting it build up uncontrollably. So start_xmit() makes a cheap optimistic check of how much space is needed for an skb and only turns the queue off if that is unachievable. net_rx_action() is the place where we could do with an accurate predicition but, since that has proven tricky to calculate, a cheap worse-case (but not too bad) estimate is all we really need since the only thing we *must* prevent is xenvif_gop_skb() consuming more slots than are available. Without this patch I can trivially stall netback permanently by just doing a large guest to guest file copy between two Windows Server 2008R2 VMs on a single host. Patch tested with frontends in: - Windows Server 2008R2 - CentOS 6.0 - Debian Squeeze - Debian Wheezy - SLES11 Signed-off-by: Paul Durrant <paul.durrant@citrix.com> Cc: Wei Liu <wei.liu2@citrix.com> Cc: Ian Campbell <ian.campbell@citrix.com> Cc: David Vrabel <david.vrabel@citrix.com> Cc: Annie Li <annie.li@oracle.com> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Acked-by: Wei Liu <wei.liu2@citrix.com> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent 512137e commit ca2f09f

File tree

3 files changed

+106
-185
lines changed

3 files changed

+106
-185
lines changed

drivers/net/xen-netback/common.h

Lines changed: 11 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -136,12 +136,10 @@ struct xenvif {
136136
char rx_irq_name[IFNAMSIZ+4]; /* DEVNAME-rx */
137137
struct xen_netif_rx_back_ring rx;
138138
struct sk_buff_head rx_queue;
139-
140-
/* Allow xenvif_start_xmit() to peek ahead in the rx request
141-
* ring. This is a prediction of what rx_req_cons will be
142-
* once all queued skbs are put on the ring.
139+
/* Set when the RX interrupt is triggered by the frontend.
140+
* The worker thread may need to wake the queue.
143141
*/
144-
RING_IDX rx_req_cons_peek;
142+
bool rx_event;
145143

146144
/* Given MAX_BUFFER_OFFSET of 4096 the worst case is that each
147145
* head/fragment page uses 2 copy operations because it
@@ -198,8 +196,6 @@ void xenvif_xenbus_fini(void);
198196

199197
int xenvif_schedulable(struct xenvif *vif);
200198

201-
int xenvif_rx_ring_full(struct xenvif *vif);
202-
203199
int xenvif_must_stop_queue(struct xenvif *vif);
204200

205201
/* (Un)Map communication rings. */
@@ -211,21 +207,20 @@ int xenvif_map_frontend_rings(struct xenvif *vif,
211207
/* Check for SKBs from frontend and schedule backend processing */
212208
void xenvif_check_rx_xenvif(struct xenvif *vif);
213209

214-
/* Queue an SKB for transmission to the frontend */
215-
void xenvif_queue_tx_skb(struct xenvif *vif, struct sk_buff *skb);
216-
/* Notify xenvif that ring now has space to send an skb to the frontend */
217-
void xenvif_notify_tx_completion(struct xenvif *vif);
218-
219210
/* Prevent the device from generating any further traffic. */
220211
void xenvif_carrier_off(struct xenvif *vif);
221212

222-
/* Returns number of ring slots required to send an skb to the frontend */
223-
unsigned int xenvif_count_skb_slots(struct xenvif *vif, struct sk_buff *skb);
224-
225213
int xenvif_tx_action(struct xenvif *vif, int budget);
226-
void xenvif_rx_action(struct xenvif *vif);
227214

228215
int xenvif_kthread(void *data);
216+
void xenvif_kick_thread(struct xenvif *vif);
217+
218+
/* Determine whether the needed number of slots (req) are available,
219+
* and set req_event if not.
220+
*/
221+
bool xenvif_rx_ring_slots_available(struct xenvif *vif, int needed);
222+
223+
void xenvif_stop_queue(struct xenvif *vif);
229224

230225
extern bool separate_tx_rx_irq;
231226

drivers/net/xen-netback/interface.c

Lines changed: 24 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -46,11 +46,6 @@ int xenvif_schedulable(struct xenvif *vif)
4646
return netif_running(vif->dev) && netif_carrier_ok(vif->dev);
4747
}
4848

49-
static int xenvif_rx_schedulable(struct xenvif *vif)
50-
{
51-
return xenvif_schedulable(vif) && !xenvif_rx_ring_full(vif);
52-
}
53-
5449
static irqreturn_t xenvif_tx_interrupt(int irq, void *dev_id)
5550
{
5651
struct xenvif *vif = dev_id;
@@ -104,8 +99,8 @@ static irqreturn_t xenvif_rx_interrupt(int irq, void *dev_id)
10499
{
105100
struct xenvif *vif = dev_id;
106101

107-
if (xenvif_rx_schedulable(vif))
108-
netif_wake_queue(vif->dev);
102+
vif->rx_event = true;
103+
xenvif_kick_thread(vif);
109104

110105
return IRQ_HANDLED;
111106
}
@@ -121,24 +116,35 @@ static irqreturn_t xenvif_interrupt(int irq, void *dev_id)
121116
static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
122117
{
123118
struct xenvif *vif = netdev_priv(dev);
119+
int min_slots_needed;
124120

125121
BUG_ON(skb->dev != dev);
126122

127123
/* Drop the packet if vif is not ready */
128-
if (vif->task == NULL)
124+
if (vif->task == NULL || !xenvif_schedulable(vif))
129125
goto drop;
130126

131-
/* Drop the packet if the target domain has no receive buffers. */
132-
if (!xenvif_rx_schedulable(vif))
133-
goto drop;
127+
/* At best we'll need one slot for the header and one for each
128+
* frag.
129+
*/
130+
min_slots_needed = 1 + skb_shinfo(skb)->nr_frags;
134131

135-
/* Reserve ring slots for the worst-case number of fragments. */
136-
vif->rx_req_cons_peek += xenvif_count_skb_slots(vif, skb);
132+
/* If the skb is GSO then we'll also need an extra slot for the
133+
* metadata.
134+
*/
135+
if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4 ||
136+
skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
137+
min_slots_needed++;
137138

138-
if (vif->can_queue && xenvif_must_stop_queue(vif))
139-
netif_stop_queue(dev);
139+
/* If the skb can't possibly fit in the remaining slots
140+
* then turn off the queue to give the ring a chance to
141+
* drain.
142+
*/
143+
if (!xenvif_rx_ring_slots_available(vif, min_slots_needed))
144+
xenvif_stop_queue(vif);
140145

141-
xenvif_queue_tx_skb(vif, skb);
146+
skb_queue_tail(&vif->rx_queue, skb);
147+
xenvif_kick_thread(vif);
142148

143149
return NETDEV_TX_OK;
144150

@@ -148,12 +154,6 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
148154
return NETDEV_TX_OK;
149155
}
150156

151-
void xenvif_notify_tx_completion(struct xenvif *vif)
152-
{
153-
if (netif_queue_stopped(vif->dev) && xenvif_rx_schedulable(vif))
154-
netif_wake_queue(vif->dev);
155-
}
156-
157157
static struct net_device_stats *xenvif_get_stats(struct net_device *dev)
158158
{
159159
struct xenvif *vif = netdev_priv(dev);
@@ -378,6 +378,8 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
378378
if (err < 0)
379379
goto err;
380380

381+
init_waitqueue_head(&vif->wq);
382+
381383
if (tx_evtchn == rx_evtchn) {
382384
/* feature-split-event-channels == 0 */
383385
err = bind_interdomain_evtchn_to_irqhandler(
@@ -410,7 +412,6 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
410412
disable_irq(vif->rx_irq);
411413
}
412414

413-
init_waitqueue_head(&vif->wq);
414415
task = kthread_create(xenvif_kthread,
415416
(void *)vif, "%s", vif->dev->name);
416417
if (IS_ERR(task)) {

0 commit comments

Comments
 (0)