-
-
Notifications
You must be signed in to change notification settings - Fork 605
/
virtio-net.hh
490 lines (425 loc) · 16.4 KB
/
virtio-net.hh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
/*
* Copyright (C) 2013 Cloudius Systems, Ltd.
*
* This work is open source software, licensed under the terms of the
* BSD license as described in the LICENSE file in the top-level directory.
*/
#ifndef VIRTIO_NET_DRIVER_H
#define VIRTIO_NET_DRIVER_H
#include <bsd/porting/netport.h>
#include <bsd/sys/net/if_var.h>
#include <bsd/sys/net/if.h>
#include <bsd/sys/sys/mbuf.h>
#include <osv/percpu_xmit.hh>
#include "drivers/virtio.hh"
#include "drivers/pci-device.hh"
namespace virtio {
/**
* @class net
* virtio net device class
*/
class net : public virtio_driver {
public:
// The feature bitmap for virtio net
enum NetFeatures {
VIRTIO_NET_F_CSUM = 0, /* Host handles pkts w/ partial csum */
VIRTIO_NET_F_GUEST_CSUM = 1, /* Guest handles pkts w/ partial csum */
VIRTIO_NET_F_MAC = 5, /* Host has given MAC address. */
VIRTIO_NET_F_GSO = 6, /* Host handles pkts w/ any GSO type */
VIRTIO_NET_F_GUEST_TSO4 = 7, /* Guest can handle TSOv4 in. */
VIRTIO_NET_F_GUEST_TSO6 = 8, /* Guest can handle TSOv6 in. */
VIRTIO_NET_F_GUEST_ECN = 9, /* Guest can handle TSO[6] w/ ECN in. */
VIRTIO_NET_F_GUEST_UFO = 10, /* Guest can handle UFO in. */
VIRTIO_NET_F_HOST_TSO4 = 11, /* Host can handle TSOv4 in. */
VIRTIO_NET_F_HOST_TSO6 = 12, /* Host can handle TSOv6 in. */
VIRTIO_NET_F_HOST_ECN = 13, /* Host can handle TSO[6] w/ ECN in. */
VIRTIO_NET_F_HOST_UFO = 14, /* Host can handle UFO in. */
VIRTIO_NET_F_MRG_RXBUF = 15, /* Host can merge receive buffers. */
VIRTIO_NET_F_STATUS = 16, /* net_config.status available */
VIRTIO_NET_F_CTRL_VQ = 17, /* Control channel available */
VIRTIO_NET_F_CTRL_RX = 18, /* Control channel RX mode support */
VIRTIO_NET_F_CTRL_VLAN = 19, /* Control channel VLAN filtering */
VIRTIO_NET_F_CTRL_RX_EXTRA = 20, /* Extra RX mode control support */
VIRTIO_NET_F_GUEST_ANNOUNCE = 21, /* Guest can announce device on the network */
VIRTIO_NET_F_MQ = 22, /* Device supports Receive Flow Steering */
VIRTIO_NET_F_CTRL_MAC_ADDR = 23, /* Set MAC address */
};
enum {
VIRTIO_NET_DEVICE_ID = 0x1000,
VIRTIO_NET_S_LINK_UP = 1, /* Link is up */
VIRTIO_NET_S_ANNOUNCE = 2, /* Announcement is needed */
VIRTIO_NET_OK = 0,
VIRTIO_NET_ERR = 1,
/*
* Control the RX mode, ie. promisucous, allmulti, etc...
* All commands require an "out" sg entry containing a 1 byte
* state value, zero = disable, non-zero = enable. Commands
* 0 and 1 are supported with the VIRTIO_NET_F_CTRL_RX feature.
* Commands 2-5 are added with VIRTIO_NET_F_CTRL_RX_EXTRA.
*/
VIRTIO_NET_CTRL_RX = 0,
VIRTIO_NET_CTRL_RX_PROMISC = 0,
VIRTIO_NET_CTRL_RX_ALLMULTI = 1,
VIRTIO_NET_CTRL_RX_ALLUNI = 2,
VIRTIO_NET_CTRL_RX_NOMULTI = 3,
VIRTIO_NET_CTRL_RX_NOUNI = 4,
VIRTIO_NET_CTRL_RX_NOBCAST = 5,
VIRTIO_NET_CTRL_MAC = 1,
VIRTIO_NET_CTRL_MAC_TABLE_SET = 0,
VIRTIO_NET_CTRL_MAC_ADDR_SET = 1,
/*
* Control VLAN filtering
*
* The VLAN filter table is controlled via a simple ADD/DEL interface.
* VLAN IDs not added may be filterd by the hypervisor. Del is the
* opposite of add. Both commands expect an out entry containing a 2
* byte VLAN ID. VLAN filterting is available with the
* VIRTIO_NET_F_CTRL_VLAN feature bit.
*/
VIRTIO_NET_CTRL_VLAN = 2,
VIRTIO_NET_CTRL_VLAN_ADD = 0,
VIRTIO_NET_CTRL_VLAN_DEL = 1,
/*
* Control link announce acknowledgement
*
* The command VIRTIO_NET_CTRL_ANNOUNCE_ACK is used to indicate that
* driver has recevied the notification; device would clear the
* VIRTIO_NET_S_ANNOUNCE bit in the status field after it receives
* this command.
*/
VIRTIO_NET_CTRL_ANNOUNCE = 3,
VIRTIO_NET_CTRL_ANNOUNCE_ACK = 0,
VIRTIO_NET_CTRL_MQ = 4,
VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET = 0,
VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN = 1,
VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX = 0x8000,
ETH_ALEN = 14,
VIRTIO_NET_CSUM_OFFLOAD = CSUM_TCP | CSUM_UDP,
};
struct net_config {
/* The config defining mac address (if VIRTIO_NET_F_MAC) */
u8 mac[6];
/* See VIRTIO_NET_F_STATUS and VIRTIO_NET_S_* above */
u16 status;
/* Maximum number of each of transmit and receive queues;
* see VIRTIO_NET_F_MQ and VIRTIO_NET_CTRL_MQ.
* Legal values are between 1 and 0x8000
*/
u16 max_virtqueue_pairs;
} __attribute__((packed));
/* This is the first element of the scatter-gather list. If you don't
* specify GSO or CSUM features, you can simply ignore the header. */
struct net_hdr {
enum {
VIRTIO_NET_HDR_F_NEEDS_CSUM = 1, // Use csum_start, csum_offset
VIRTIO_NET_HDR_F_DATA_VALID = 2, // Csum is valid
};
u8 flags;
enum {
VIRTIO_NET_HDR_GSO_NONE = 0, // Not a GSO frame
VIRTIO_NET_HDR_GSO_TCPV4 = 1, // GSO frame, IPv4 TCP (TSO)
VIRTIO_NET_HDR_GSO_UDP = 3, // GSO frame, IPv4 UDP (UFO)
VIRTIO_NET_HDR_GSO_TCPV6 = 4, // GSO frame, IPv6 TCP
VIRTIO_NET_HDR_GSO_ECN = 0x80, // TCP has ECN set
};
u8 gso_type;
u16 hdr_len; /* Ethernet + IP + tcp/udp hdrs */
u16 gso_size; /* Bytes to append to hdr_len per frame */
u16 csum_start; /* Position to start checksumming from */
u16 csum_offset; /* Offset after that to place checksum */
};
/* This is the version of the header to use when the MRG_RXBUF
* feature has been negotiated. */
struct net_hdr_mrg_rxbuf {
struct net_hdr hdr;
u16 num_buffers; /* Number of merged rx buffers */
};
/*
* Control virtqueue data structures
*
* The control virtqueue expects a header in the first sg entry
* and an ack/status response in the last entry. Data for the
* command goes in between.
*/
struct net_ctrl_hdr {
u8 class_t;
u8 cmd;
} __attribute__((packed));
typedef u8 net_ctrl_ack;
/*
* Control the MAC
*
* The MAC filter table is managed by the hypervisor, the guest should
* assume the size is infinite. Filtering should be considered
* non-perfect, ie. based on hypervisor resources, the guest may
* received packets from sources not specified in the filter list.
*
* In addition to the class/cmd header, the TABLE_SET command requires
* two out scatterlists. Each contains a 4 byte count of entries followed
* by a concatenated byte stream of the ETH_ALEN MAC addresses. The
* first sg list contains unicast addresses, the second is for multicast.
* This functionality is present if the VIRTIO_NET_F_CTRL_RX feature
* is available.
*
* The ADDR_SET command requests one out scatterlist, it contains a
* 6 bytes MAC address. This functionality is present if the
* VIRTIO_NET_F_CTRL_MAC_ADDR feature is available.
*/
struct net_ctrl_mac {
u32 entries;
u8 macs[][ETH_ALEN];
} __attribute__((packed));
/*
* Control Receive Flow Steering
*
* The command VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET
* enables Receive Flow Steering, specifying the number of the transmit and
* receive queues that will be used. After the command is consumed and acked by
* the device, the device will not steer new packets on receive virtqueues
* other than specified nor read from transmit virtqueues other than specified.
* Accordingly, driver should not transmit new packets on virtqueues other than
* specified.
*/
struct net_ctrl_mq {
u16 virtqueue_pairs;
};
explicit net(pci::device& dev);
virtual ~net();
virtual std::string get_name() const { return _driver_name; }
void read_config();
virtual u32 get_driver_features();
void wait_for_queue(vring* queue);
bool bad_rx_csum(struct mbuf* m, struct net_hdr* hdr);
void receiver();
void fill_rx_ring();
mbuf* packet_to_mbuf(const std::vector<iovec>& iovec);
static void free_buffer_and_refcnt(void* buffer, void* refcnt);
static void free_buffer(iovec iov) { do_free_buffer(iov.iov_base); }
static void do_free_buffer(void* buffer);
bool ack_irq();
static hw_driver* probe(hw_device* dev);
/**
* Fill the if_data buffer with data from our iface including those that
* we have gathered by ourselvs (e.g. FP queue stats).
* @param out_data output buffer
*/
void fill_stats(struct if_data* out_data) const;
/**
* Transmit a single frame.
*
* @note This function may sleep!
* @param buff frame to transmit
*
* @return 0 in case of success, EINVAL in case the frame is not
* well-formed.
*/
int xmit(mbuf* buff);
private:
struct net_req {
explicit net_req(mbuf *m) : mb(m), hw_queue_was_full(0) {
memset(&mhdr, 0, sizeof(mhdr));
}
struct net::net_hdr_mrg_rxbuf mhdr;
mbuf* mb;
u64 tx_bytes;
int hw_queue_was_full;
};
std::string _driver_name;
net_config _config;
bool _mergeable_bufs;
bool _tso_ecn = false;
bool _status = false;
bool _host_tso_ecn = false;
bool _csum = false;
bool _guest_csum = false;
bool _guest_tso4 = false;
bool _host_tso4 = false;
bool _guest_ufo = false;
u32 _hdr_size;
std::unique_ptr<pci_interrupt> _irq;
struct rxq_stats {
u64 rx_packets; /* if_ipackets */
u64 rx_bytes; /* if_ibytes */
u64 rx_drops; /* if_iqdrops */
u64 rx_csum; /* number of packets with correct csum */
u64 rx_csum_err;/* number of packets with a bad checksum */
u64 rx_bh_wakeups;
wakeup_stats rx_wakeup_stats;
};
struct txq_stats {
u64 tx_packets; /* if_opackets */
u64 tx_bytes; /* if_obytes */
u64 tx_err; /* Number of broken packets */
u64 tx_drops; /* Number of dropped packets */
u64 tx_csum; /* CSUM offload requests */
u64 tx_tso; /* GSO/TSO packets */
/* u64 tx_rescheduled; */ /* TODO when we implement xoff */
u64 tx_worker_kicks;
u64 tx_kicks;
u64 tx_worker_wakeups;
u64 tx_worker_packets;
u64 tx_hw_queue_is_full;
wakeup_stats tx_wakeup_stats;
};
/* Single Rx queue object */
struct rxq {
rxq(vring* vq, std::function<void ()> poll_func)
: vqueue(vq), poll_task(sched::thread::make(poll_func, sched::thread::attr().
name("virtio-net-rx"))) {};
vring* vqueue;
std::unique_ptr<sched::thread> poll_task;
struct rxq_stats stats = { 0 };
void update_wakeup_stats(const u64 wakeup_packets) {
if_update_wakeup_stats(stats.rx_wakeup_stats, wakeup_packets);
}
};
/**
* @class txq
* A single Tx queue object.
*
* TODO: Make it a class!
*/
struct txq {
friend osv::xmitter_functor<txq>;
txq(net* parent, vring* vq) :
vqueue(vq), _parent(parent), _xmit_it(this),
_kick_thresh(vqueue->size()),
_xmitter(this,
// TODO: implement a proper StopPred when we fix a SP code
[] { return false; },
_xmit_it, "virtio-tx")
{
//
// Kick at least every full ring of packets (see _kick_thresh
// above).
//
// Othersize a deadlock is possible:
// 1) We post a full ring of buffers without a kick().
// 2) We block on posting of the next buffer.
// 3) HW doesn't know there is a work to do.
// 4) Dead lock.
//
};
/**
* Checks the packet and returns the net_req (returned in a "cooky")
* @param m_head
* @param cooky
*
* @return 0 if packet is ok and EINVAL if it's not well-formed.
*/
int xmit_prep(mbuf* m_head, void*& cooky);
/**
* Try to transmit a single packet. Don't block on failure.
*
* Must run with "running" lock taken.
* In case of a success this function will update Tx statistics.
* @param m_head
* @param cooky Cooky returned by xmit_prep().
* @param tx_bytes
*
* @return 0 if packet has been successfully sent and ENOBUFS if there
* was no room on a HW ring to send the packet.
*/
int try_xmit_one_locked(void* cooky);
/**
* Kick the vqueue if number of pending packets has reached the given
* threshold.
*
* @param thresh threshold
*/
void kick_pending(u16 thresh = 1);
void kick_pending_with_thresh() {
kick_pending(_kick_thresh);
}
/**
* Kick the underlying vring.
*
* @return TRUE if the vring has been actually indicated.
*/
bool kick_hw();
int xmit(mbuf* m_head);
void update_wakeup_stats(const u64 wakeup_packets) {
if_update_wakeup_stats(stats.tx_wakeup_stats, wakeup_packets);
}
void start() { _xmitter.start(); }
int qsize() { return vqueue->size(); }
/* TODO: drain the per-cpu rings in ~txq() and in if_qflush() */
vring* vqueue;
txq_stats stats = { 0 };
private:
/**
* This is a private version of try_xmit_one_locked() that acually does
* the work.
* This function won't update Tx statistics - the caller should do this
* after the packet is actually sent.
* @param m_head
* @param req
*
* @return 0 if packet has been successfully sent and ENOBUFS if there
* was no room on a HW ring to send the packet.
*/
int try_xmit_one_locked(net_req* req);
/**
* Transmit a single packet. Will wait for completions if there is no
* room on a HW ring.
*
* Must run with "running" lock taken.
* @param req Tx request handle
*/
void xmit_one_locked(void* req);
/**
* Free the descriptors for the completed packets.
*/
void gc();
/**
* Update the packet handle and the net_hdr according to various offload
* features.
* @param m Tx packet handle
* @param hdr net_hdr to update
*
* @return The updated Tx packet handle. If packet wasn't well-formed
* nullptr will be returned.
*/
mbuf* offload(mbuf* m, net_hdr* hdr);
/**
* Update Tx stats for a single packet in case of a successful xmit.
* @param req Appropriate net_req for this packet (we need its mhdr)
*/
void update_stats(net_req* req);
net* _parent;
osv::tx_xmit_iterator<txq> _xmit_it;
const int _kick_thresh;
u16 _pkts_to_kick = 0;
//
// 4096 is the size of the buffers ring of the FreeBSD virtio-net
// driver. So, we are using this as a baseline. We may ajust this value
// later (cut it down maybe?!).
//
// Currently this gives us ~16 pages per one CPU ring.
//
osv::xmitter<txq, 4096,
std::function<bool ()>,
osv::tx_xmit_iterator<txq>> _xmitter;
};
/**
* Fill the Rx queue statistics in the general info struct
* @param rxq Rx queue handle
* @param out_data output buffer
*/
void fill_qstats(const struct rxq& rxq, struct if_data* out_data) const;
/**
* Fill the Tx queue statistics in the general info struct
* @param txq Tx queue handle
* @param out_data output buffer
*/
void fill_qstats(const struct txq& txq, struct if_data* out_data) const;
/* We currently support only a single Rx+Tx queue */
struct rxq _rxq;
struct txq _txq;
//maintains the virtio instance number for multiple drives
static int _instance;
int _id;
struct ifnet* _ifn;
};
}
#endif