-
Notifications
You must be signed in to change notification settings - Fork 9
/
RPCv3Transceiver.v
483 lines (380 loc) · 15.3 KB
/
RPCv3Transceiver.v
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
`default_nettype none
`timescale 1ns / 1ps
/***********************************************************************************************************************
* *
* ANTIKERNEL v0.1 *
* *
* Copyright (c) 2012-2016 Andrew D. Zonenberg *
* All rights reserved. *
* *
* Redistribution and use in source and binary forms, with or without modification, are permitted provided that the *
* following conditions are met: *
* *
* * Redistributions of source code must retain the above copyright notice, this list of conditions, and the *
* following disclaimer. *
* *
* * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the *
* following disclaimer in the documentation and/or other materials provided with the distribution. *
* *
* * Neither the name of the author nor the names of any contributors may be used to endorse or promote products *
* derived from this software without specific prior written permission. *
* *
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED *
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL *
* THE AUTHORS BE HELD LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR *
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT *
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE *
* POSSIBILITY OF SUCH DAMAGE. *
* *
***********************************************************************************************************************/
/**
@file
@author Andrew D. Zonenberg
@brief Transceiver for RPC network, protocol version 3
Messages are fixed size 128-bit datagrams.
Flow control:
Transmitter waits for rpc_rx_ready to be asserted
Transmitter asserts rpc_tx_en and sends the first DATA_WIDTH bits of the message
Transmitter sends remaining data, if any, on consecutive clock edges.
No inter-frame gap is required, the next message may be sent immediately if rpc_rx_ready is still asserted.
The packet consists of eight 16-bit words. When DATA_WIDTH=16 the message is sent one word per clock.
Large DATA_WIDTH values send messages two, four, or eight words at a time.
Word 0: tx_dst_addr
Word 1: NODE_ADDR (automatically added)
Word 2: {tx_callnum, tx_type, tx_d0[20:16]}
Word 3: tx_d0[15:0]
Word 4: tx_d1[31:16]
Word 5: tx_d1[15:0]
Word 6: tx_d2[31:16]
Word 7: tx_d2[15:0]
To transmit a message:
Load rpc_fab_tx_* with the message to be sent
Assert rpc_fab_tx_en for one cycle
rpc_fab_tx_done goes high combinatorially for one cycle as the last word of the message is sent.
Do not change rpc_fab_tx_* until rpc_fab_tx_done goes low.
rpc_fab_tx_busy is high during the entire period of a send, including the "wait for rx_ready" phase.
Note that if DATA_WIDTH=128 and rpc_tx_ready is set, rpc_fab_tx_done will be asserted combinatorially.
To receive a message:
Assert rpc_fab_rx_ready for one cycle
Wait for rpc_fab_rx_en to go high and process rpc_fab_rx_*
If still ready to receive another message, keep rpc_fab_rx_ready high.
Note that two messages can arrive on consecutive clocks (before rpc_fab_rx_ready can be deasserted)
if DATA_WIDTH=128, so any node receiving messages witha 128-bit bus must have a 2+ message receive FIFO.
RESOURCE USAGE (XST S6 post synthesis estimate, TX side)
TODO update
Quiet
Width FF LUT
16 7 57
32 6 54
64 4 68
128 2 115
Noisy
Width FF LUT
16 9 54
32 6 38
64 3 68
128 1 3
RPCv2 (for comparison)
32 11 73
RESOURCE USAGE (XST S6 post synthesis estimate, RX side)
RPCv3
Width FF LUT
16 126 117
32 125 116
64 118 48
128 BROKEN!!!
RPCv2 (for comparison)
32 143 135
*/
module RPCv3Transceiver
#(
//Data width (must be one of 16, 32, 64, 128).
parameter DATA_WIDTH = 32,
//When zero, queued data is allowed to show up on rpc_tx_data in between packets.
//This can cause extra switching power consumption and clutters LA traces, but uses less LUTs.
//When nonzero, force rpc_tx_data to zero when not transmitting.
parameter QUIET_WHEN_IDLE = 1,
//This transceiver is always a leaf (node) port, no exceptions.
//We always send from this address.
parameter NODE_ADDR = 16'h8000
) (
//Clock for this link
input wire clk,
//Network interface, outbound side
output reg rpc_tx_en = 0,
output reg[DATA_WIDTH-1:0] rpc_tx_data = 0,
input wire rpc_tx_ready,
//Network interface, inbound side
input wire rpc_rx_en,
input wire[DATA_WIDTH-1:0] rpc_rx_data,
output reg rpc_rx_ready = 0,
//Fabric interface, outbound side
input wire rpc_fab_tx_en,
output reg rpc_fab_tx_busy = 0,
input wire[15:0] rpc_fab_tx_dst_addr,
input wire[7:0] rpc_fab_tx_callnum,
input wire[2:0] rpc_fab_tx_type,
input wire[20:0] rpc_fab_tx_d0,
input wire[31:0] rpc_fab_tx_d1,
input wire[31:0] rpc_fab_tx_d2,
output reg rpc_fab_tx_done = 0,
//Fabric interface, inbound side
input wire rpc_fab_rx_ready,
output reg rpc_fab_rx_busy = 0,
output reg rpc_fab_rx_en = 0,
output reg[15:0] rpc_fab_rx_src_addr = 0,
output reg[7:0] rpc_fab_rx_callnum = 0,
output reg[2:0] rpc_fab_rx_type = 0,
output reg[20:0] rpc_fab_rx_d0 = 0,
output reg[31:0] rpc_fab_rx_d1 = 0,
output reg[31:0] rpc_fab_rx_d2 = 0
);
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Synthesis-time sanity checking
initial begin
case(DATA_WIDTH)
16: begin
end
32: begin
end
64: begin
end
128: begin
end
default: begin
$display("ERROR: RPCv3Transceiver DATA_WIDTH must be 16/32/64/128");
$finish;
end
endcase
end
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Compute some useful values
//Number of clocks it takes to send a message
localparam MESSAGE_CYCLES = 128 / DATA_WIDTH;
//Number of bits we need in the cycle counter
`include "../../synth_helpers/clog2.vh"
localparam CYCLE_BITS = clog2(MESSAGE_CYCLES);
localparam CYCLE_MAX = CYCLE_BITS ? CYCLE_BITS-1 : 0;
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Transmit path
//Words of data to send
wire[15:0] tx_d0 = rpc_fab_tx_dst_addr;
wire[15:0] tx_d1 = NODE_ADDR;
wire[15:0] tx_d2 = {rpc_fab_tx_callnum, rpc_fab_tx_type, rpc_fab_tx_d0[20:16]};
wire[15:0] tx_d3 = rpc_fab_tx_d0[15:0];
wire[15:0] tx_d4 = rpc_fab_tx_d1[31:16];
wire[15:0] tx_d5 = rpc_fab_tx_d1[15:0];
wire[15:0] tx_d6 = rpc_fab_tx_d2[31:16];
wire[15:0] tx_d7 = rpc_fab_tx_d2[15:0];
//If we couldn't send the first cycle, remember that we have a send queued up
reg tx_pending = 0;
//True if we want to send (either current or queued transmit request)
wire tx_request = tx_pending || rpc_fab_tx_en;
//True if we're starting a transmit this cycle (whether queued or fresh)
wire tx_starting = tx_request && rpc_tx_ready;
//Position within the message (in DATA_WIDTH-bit units)
reg[CYCLE_MAX:0] tx_count = 0;
//True if a transmit is in progress
wire tx_active = (tx_count != 0) || tx_starting;
generate
//128-bit datapath is stupidly simple, special-case it
if(DATA_WIDTH == 128) begin
//All transmit logic is combinatorial
always @(*) begin
//We're only busy if we want to send, but can't yet
rpc_fab_tx_busy <= (tx_pending || (tx_count != 0) ) && !rpc_tx_ready;
//Send the message if we're starting to send
rpc_tx_en <= tx_starting;
//We finish sending the same cycle we send
rpc_fab_tx_done <= tx_starting;
//Optionally squash output when not sending
if(QUIET_WHEN_IDLE && !tx_active)
rpc_tx_data <= {DATA_WIDTH{1'b0}};
//Nope, send it
else
rpc_tx_data <= {tx_d0, tx_d1, tx_d2, tx_d3, tx_d4, tx_d5, tx_d6, tx_d7};
end
//One little bit of stateful logic, though :)
always @(posedge clk) begin
//Clear pending messages once sent
if(tx_starting)
tx_pending <= 0;
//If we try to send when rx isn't ready, save it until they are
if(rpc_fab_tx_en && !rpc_tx_ready)
tx_pending <= 1;
end
end
//All other datapaths take >1 cycle to send
else begin
//Combinatorial transmit
always @(*) begin
//We're busy if we want to send, or are sending.
rpc_fab_tx_busy <= (tx_pending || (tx_count != 0) );
//Send the message if we're starting to send
rpc_tx_en <= tx_starting;
//Optionally squash output when not sending
if(QUIET_WHEN_IDLE && !tx_active)
rpc_tx_data <= {DATA_WIDTH{1'b0}};
//Nope, send it and assert the done flag on the last clock
else if(DATA_WIDTH == 64) begin
case(tx_count)
1: rpc_tx_data <= { tx_d4, tx_d5, tx_d6, tx_d7 };
default: rpc_tx_data <= { tx_d0, tx_d1, tx_d2, tx_d3 };
endcase
end
else if(DATA_WIDTH == 32) begin
case(tx_count)
3: rpc_tx_data <= { tx_d6, tx_d7 };
2: rpc_tx_data <= { tx_d4, tx_d5 };
1: rpc_tx_data <= { tx_d2, tx_d3 };
default: rpc_tx_data <= { tx_d0, tx_d1 };
endcase
end
else begin
case(tx_count)
7: rpc_tx_data <= tx_d7;
6: rpc_tx_data <= tx_d6;
5: rpc_tx_data <= tx_d5;
4: rpc_tx_data <= tx_d4;
3: rpc_tx_data <= tx_d3;
2: rpc_tx_data <= tx_d2;
1: rpc_tx_data <= tx_d1;
default: rpc_tx_data <= tx_d0;
endcase
end
//Set done flag at end of message
rpc_fab_tx_done <= (tx_count == {CYCLE_BITS{1'b1}});
end
//More state logic needed to keep track of our phase etc
always @(posedge clk) begin
//Clear pending messages once sent
if(tx_active)
tx_pending <= 0;
//If we try to send when rx isn't ready, save it until they are
if(rpc_fab_tx_en && !rpc_tx_ready)
tx_pending <= 1;
//Increment word counter
if(tx_active)
tx_count <= tx_count + 1'h1;
end
end
endgenerate
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Receive path
//True if we are in the first cycle of an incoming message
wire rx_starting = (rpc_rx_en && rpc_rx_ready);
//Position within the message (in DATA_WIDTH-bit units)
reg[CYCLE_MAX:0] rx_count = 0;
//True if a receive is in progress
wire rx_active = (rx_count != 0) || rx_starting;
generate
//Set busy flag if we're doing stuff (mostly used by formal stuff)
always @(*) begin
rpc_fab_rx_busy <= (rx_count != 0);
end
always @(posedge clk) begin
rpc_fab_rx_en <= 0;
//Process incoming data words
if(rx_active) begin
//Whole message is on the wire right now
if(DATA_WIDTH == 128) begin
//127:112 are source address, ignore
rpc_fab_rx_src_addr <= rpc_rx_data[111:96];
rpc_fab_rx_callnum <= rpc_rx_data[95:88];
rpc_fab_rx_type <= rpc_rx_data[87:85];
rpc_fab_rx_d0 <= rpc_rx_data[84:64];
rpc_fab_rx_d1 <= rpc_rx_data[63:32];
rpc_fab_rx_d2 <= rpc_rx_data[31:0];
//end of message
rpc_fab_rx_en <= 1;
end
else begin
//Update word count as we move through the message
if(rx_starting)
rx_count <= 1;
else
rx_count <= rx_count + 1;
//Grab whatever fields are currently on the wire
case(rx_count)
0: begin
case(DATA_WIDTH)
16: begin
//dest address, ignore
end
32: rpc_fab_rx_src_addr <= rpc_rx_data[15:0];
64: begin
rpc_fab_rx_src_addr <= rpc_rx_data[47:32];
rpc_fab_rx_callnum <= rpc_rx_data[31:24];
rpc_fab_rx_type <= rpc_rx_data[23:21];
rpc_fab_rx_d0 <= rpc_rx_data[20:0];
end
endcase
end
1: begin
case(DATA_WIDTH)
16: begin
rpc_fab_rx_src_addr <= rpc_rx_data;
end
32: begin
rpc_fab_rx_callnum <= rpc_rx_data[31:24];
rpc_fab_rx_type <= rpc_rx_data[23:21];
rpc_fab_rx_d0 <= rpc_rx_data[20:0];
end
64: begin
rpc_fab_rx_d1 <= rpc_rx_data[63:32];
rpc_fab_rx_d2 <= rpc_rx_data[31:0];
//end of message
rpc_fab_rx_en <= 1;
end
endcase
end
2: begin
if(DATA_WIDTH == 32)
rpc_fab_rx_d1 <= rpc_rx_data;
else begin
rpc_fab_rx_callnum <= rpc_rx_data[15:8];
rpc_fab_rx_type <= rpc_rx_data[7:5];
rpc_fab_rx_d0[20:16] <= rpc_rx_data[4:0];
end
end
3: begin
if(DATA_WIDTH == 32) begin
rpc_fab_rx_d2 <= rpc_rx_data;
//end of message
rpc_fab_rx_en <= 1;
end
else
rpc_fab_rx_d0[15:0] <= rpc_rx_data;
end
//If we get here we're a 16-bit message, so no more conditionals needed
4: rpc_fab_rx_d1[31:16] <= rpc_rx_data;
5: rpc_fab_rx_d1[15:0] <= rpc_rx_data;
6: rpc_fab_rx_d2[31:16] <= rpc_rx_data;
7: begin
rpc_fab_rx_d2[15:0] <= rpc_rx_data;
//end of message
rpc_fab_rx_en <= 1;
end
endcase
end
end
end
//Ready to receive if the fabric side is ready.
//Once we go ready, go un-ready when a message comes in unless we keep ready high.
reg rpc_rx_ready_ff = 0;
always @(posedge clk) begin
if(rpc_rx_en)
rpc_rx_ready_ff <= 0;
if(rpc_fab_rx_ready)
rpc_rx_ready_ff <= 1;
end
always @(*) begin
rpc_rx_ready <= rpc_rx_ready_ff;
if(rpc_rx_en)
rpc_rx_ready <= 0;
if(rpc_fab_rx_ready)
rpc_rx_ready <= 1;
end
endgenerate
endmodule