-
-
Notifications
You must be signed in to change notification settings - Fork 605
/
Copy pathvirtio-vring.hh
291 lines (239 loc) · 9.93 KB
/
virtio-vring.hh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
/*
* Copyright (C) 2013 Cloudius Systems, Ltd.
*
* This work is open source software, licensed under the terms of the
* BSD license as described in the LICENSE file in the top-level directory.
*/
#ifndef VIRTIO_VRING_H
#define VIRTIO_VRING_H
#include <atomic>
#include <functional>
#include <osv/mutex.h>
#include <osv/debug.hh>
#include <osv/mmu.hh>
#include <osv/sched.hh>
#define virtio_tag "virtio"
#define virtio_d(...) tprintf_d(virtio_tag, __VA_ARGS__)
#define virtio_i(...) tprintf_i(virtio_tag, __VA_ARGS__)
#define virtio_w(...) tprintf_w(virtio_tag, __VA_ARGS__)
#define virtio_e(...) tprintf_e(virtio_tag, __VA_ARGS__)
static TRACEPOINT(trace_vring_get_buf_finalize, "vring=%p: _used_ring_host_head %d",
void*, int);
static TRACEPOINT(trace_vring_update_used_event, "vring=%p: _used_ring_host_head %d",
void*, int);
namespace virtio {
class virtio_vring;
class virtio_driver;
// Buffer descriptors in the ring
class vring_desc {
public:
enum flags {
// Read only buffer
VRING_DESC_F_READ=0,
// This marks a buffer as continuing via the next field.
VRING_DESC_F_NEXT=1,
// This marks a buffer as write-only (otherwise read-only).
VRING_DESC_F_WRITE=2,
// This means the buffer contains a list of buffer descriptors.
VRING_DESC_F_INDIRECT=4
};
u32 get_len() { return _len; }
u16 next_idx() { return _next; }
// flags
bool is_chained() { return (_flags & VRING_DESC_F_NEXT) == VRING_DESC_F_NEXT; };
bool is_write() { return (_flags & VRING_DESC_F_WRITE) == VRING_DESC_F_WRITE; };
bool is_indirect() { return (_flags & VRING_DESC_F_INDIRECT) == VRING_DESC_F_INDIRECT; };
u64 _paddr;
u32 _len;
u16 _flags;
u16 _next;
};
// Guest to host
class vring_avail {
public:
enum {
// Mark that we do not need an interrupt for consuming a descriptor
// from the ring. Unreliable so it's simply an optimization
VRING_AVAIL_F_NO_INTERRUPT=1
};
void disable_interrupt() { _flags.store(VRING_AVAIL_F_NO_INTERRUPT, std::memory_order_relaxed); }
void enable_interrupt() { _flags.store(0, std::memory_order_relaxed); }
bool interrupt_on() { return (_flags.load(std::memory_order_relaxed) & VRING_AVAIL_F_NO_INTERRUPT) == 0;}
std::atomic<u16> _flags;
// Where we put the next descriptor
std::atomic<u16> _idx;
// There may be no more entries than the queue size read from device
u16 _ring[];
// used event index is an optimization in order to get an interrupt from the host
// only when the value reaches this number
// The location of this field is places after the variable length ring array,
// that's why we cannot fully define it within the struct and use a function accessor
//std::atomic<u16> used_event;
};
class vring_used_elem {
public:
// Index of start of used vring_desc chain. (u32 for padding reasons)
u32 _id;
// Total length of the descriptor chain which was used (written to)
u32 _len;
};
// Host to guest
class vring_used {
public:
enum {
// The Host advise the Guest: don't kick me when
// you add a buffer. It's unreliable, so it's simply an
// optimization. Guest will still kick if it's out of buffers.
VRING_USED_F_NO_NOTIFY=1
};
bool notifications_disabled() {
return (_flags.load(std::memory_order_relaxed) & VRING_USED_F_NO_NOTIFY) != 0;
}
// Using std::atomic since it being changed by the host
std::atomic<u16> _flags;
// Using std::atomic in order to have memory barriers for it
std::atomic<u16> _idx;
vring_used_elem _used_elements[];
// avail event index is an optimization kick the host only when the value reaches this number
// The location of this field is places after the variable length ring array,
// that's why we cannot fully define it within the struct and use a function accessor
//std::atomic<u16> avail_event;
};
class vring {
public:
vring(virtio_driver* const driver, u16 num, u16 q_index);
virtual ~vring();
u64 get_paddr();
static unsigned get_size(unsigned int num, unsigned long align);
u64 get_desc_addr();
u64 get_avail_addr();
u64 get_used_addr();
// Ring operations
bool add_buf(void* cookie);
// Get the top item from the used ring
void* get_buf_elem(u32* len);
/**
* Increment the _used_ring_host_head and doorbell if requested.
*
* If requested let the host know we consumed the used entry.
* We separate that from get_buf_elem so no one will re-cycle the
* request header location until we're finished with it in the upper
* layer.
*
* @param update_host if TRUE - update the host as well
*/
__attribute__((always_inline)) inline // Necessary because of issue #1029
void get_buf_finalize(bool update_host = true) {
_used_ring_host_head++;
trace_vring_get_buf_finalize(this, _used_ring_host_head);
if (update_host) {
update_used_event();
}
}
__attribute__((always_inline)) inline // Necessary because of issue #1029
void update_used_event() {
// only let the host know about our used idx in case irq are enabled
if (_avail->interrupt_on()) {
trace_vring_update_used_event(this, _used_ring_host_head);
set_used_event(_used_ring_host_head, std::memory_order_release);
}
}
// GC the used items that were already read to be emptied
// within the ring. Should be called by add_buf
// It was separated from the get_buf flow to allow parallelism of the two
void get_buf_gc();
inline u16 effective_avail_ring_count()
{
return _avail_count + (_used_ring_host_head - _used_ring_guest_head);
}
bool used_ring_not_empty() const;
bool used_ring_is_half_empty() const;
bool used_ring_can_gc() const;
bool avail_ring_not_empty();
// when the available ring has x descriptors as room it means that
// x descriptors can be allocated while _num-x are available for the host
bool avail_ring_has_room(int n);
bool use_indirect(int desc_needed);
void set_use_indirect(bool flag) { _use_indirect = flag;}
bool get_use_indirect() { return _use_indirect;}
bool kick();
// Total number of descriptors in ring
int size() {return _num;}
u16 index() {return _q_index; }
// Use memory order acquire when there are prior updates to local variables that must
// be seen by the reading threads
void set_used_event(u16 event, std::memory_order order) {_used_event->store(event, order);};
// Let host know about interrupt delivery
void disable_interrupts();
void enable_interrupts();
const int max_sgs = 256;
struct sg_node {
u64 _paddr;
u32 _len;
u16 _flags;
sg_node(u64 addr, u32 len, u16 flags=0) :_paddr(addr), _len(len), _flags(flags) {};
sg_node(const sg_node& n) :_paddr(n._paddr), _len(n._len), _flags(n._flags) {};
};
void init_sg()
{
_sg_vec.clear();
}
void add_out_sg(void* vaddr, u32 len)
{
add_sg(vaddr, len, vring_desc::VRING_DESC_F_READ);
}
void add_in_sg(void* vaddr, u32 len)
{
add_sg(vaddr, len, vring_desc::VRING_DESC_F_WRITE);
}
void add_sg(void* vaddr, u32 len, vring_desc::flags desc_flags) {
mmu::virt_to_phys(vaddr, len, [this, desc_flags] (mmu::phys paddr, size_t len) {
_sg_vec.emplace_back(paddr, len, desc_flags);
});
}
void add_buf_wait(void* cookie);
void wakeup_waiter()
{
_waiter.wake();
}
// holds a temporary sg_nodes that travel between the upper layer virtio to add_buf
std::vector<sg_node> _sg_vec;
sched::thread_handle _waiter;
u16 avail_head() const {return _avail_head;};
private:
// Up pointer
virtio_driver* _driver;
u16 _q_index;
// The physical of the physical address handed to the virtio device
void* _vring_ptr;
// Total number of descriptors in ring
unsigned int _num;
// Position of the next available descriptor
u16 _avail_head;
// Position of the used descriptor we've last seen
// from the host used ring
u16 _used_ring_host_head;
// Position of the used descriptor we've last seen
// used internally for get-add bufs sync
u16 _used_ring_guest_head;
// The amount of avail descriptors we've added since last kick
u16 _avail_added_since_kick;
u16 _avail_count;
// Flat list of chained descriptors
vring_desc* _desc;
// Available for host consumption
vring_avail* _avail;
// Available for guest consumption
vring_used* _used;
// cookies to store access to the upper layer pointers
void** _cookie;
//protects parallel get_buf/add_buf access, mainly the _avail_head variable
mutex _lock;
// pointer to the end of the used ring to get a glimpse of the host avail idx
std::atomic<u16>* _avail_event;
std::atomic<u16>* _used_event;
// A flag set by driver to turn on/off indirect descriptor
bool _use_indirect;
};
}
#endif // VIRTIO_VRING_H