Skip to content

Commit f7f35c0

Browse files
committed
Merge branch 'rds-next'
Sowmini Varadhan says: ==================== net/rds: SOL_RDS socket option to explicitly select transport Today the underlying transport (TCP or IB) for a PF_RDS socket is implicitly selected based on the local address used to bind(2) the PF_RDS socket. This results in some non-deterministic behavior when there are un-numbered and IPoIB interfaces sharing the same IP address. It also places the constraint that the IB interface must have an IP address (and thus, IPoIB) configured on it. The non-determinism may be avoided by providing the user-space application a socket option that allows it to explicitly select the transport prior to bind(2). Patch 1 of this series provides the constant definitions needed by the application via <linux/rds.h>. Patch 2 provides the setsockopt support, and Patch 3 provides the getsockopt support. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
2 parents f16e9d8 + 8ba3846 commit f7f35c0

File tree

5 files changed

+77
-5
lines changed

5 files changed

+77
-5
lines changed

include/uapi/linux/rds.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@
3838

3939
#define RDS_IB_ABI_VERSION 0x301
4040

41+
#define SOL_RDS 276
42+
4143
/*
4244
* setsockopt/getsockopt for SOL_RDS
4345
*/
@@ -48,6 +50,14 @@
4850
#define RDS_RECVERR 5
4951
#define RDS_CONG_MONITOR 6
5052
#define RDS_GET_MR_FOR_DEST 7
53+
#define SO_RDS_TRANSPORT 8
54+
55+
/* supported values for SO_RDS_TRANSPORT */
56+
#define RDS_TRANS_IB 0
57+
#define RDS_TRANS_IWARP 1
58+
#define RDS_TRANS_TCP 2
59+
#define RDS_TRANS_COUNT 3
60+
#define RDS_TRANS_NONE (~0)
5161

5262
/*
5363
* Control message types for SOL_RDS.

net/rds/af_rds.c

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,28 @@ static int rds_cong_monitor(struct rds_sock *rs, char __user *optval,
270270
return ret;
271271
}
272272

273+
static int rds_set_transport(struct rds_sock *rs, char __user *optval,
274+
int optlen)
275+
{
276+
int t_type;
277+
278+
if (rs->rs_transport)
279+
return -EOPNOTSUPP; /* previously attached to transport */
280+
281+
if (optlen != sizeof(int))
282+
return -EINVAL;
283+
284+
if (copy_from_user(&t_type, (int __user *)optval, sizeof(t_type)))
285+
return -EFAULT;
286+
287+
if (t_type < 0 || t_type >= RDS_TRANS_COUNT)
288+
return -EINVAL;
289+
290+
rs->rs_transport = rds_trans_get(t_type);
291+
292+
return rs->rs_transport ? 0 : -ENOPROTOOPT;
293+
}
294+
273295
static int rds_setsockopt(struct socket *sock, int level, int optname,
274296
char __user *optval, unsigned int optlen)
275297
{
@@ -300,6 +322,11 @@ static int rds_setsockopt(struct socket *sock, int level, int optname,
300322
case RDS_CONG_MONITOR:
301323
ret = rds_cong_monitor(rs, optval, optlen);
302324
break;
325+
case SO_RDS_TRANSPORT:
326+
lock_sock(sock->sk);
327+
ret = rds_set_transport(rs, optval, optlen);
328+
release_sock(sock->sk);
329+
break;
303330
default:
304331
ret = -ENOPROTOOPT;
305332
}
@@ -312,6 +339,7 @@ static int rds_getsockopt(struct socket *sock, int level, int optname,
312339
{
313340
struct rds_sock *rs = rds_sk_to_rs(sock->sk);
314341
int ret = -ENOPROTOOPT, len;
342+
int trans;
315343

316344
if (level != SOL_RDS)
317345
goto out;
@@ -337,6 +365,19 @@ static int rds_getsockopt(struct socket *sock, int level, int optname,
337365
else
338366
ret = 0;
339367
break;
368+
case SO_RDS_TRANSPORT:
369+
if (len < sizeof(int)) {
370+
ret = -EINVAL;
371+
break;
372+
}
373+
trans = (rs->rs_transport ? rs->rs_transport->t_type :
374+
RDS_TRANS_NONE); /* unbound */
375+
if (put_user(trans, (int __user *)optval) ||
376+
put_user(sizeof(int), optlen))
377+
ret = -EFAULT;
378+
else
379+
ret = 0;
380+
break;
340381
default:
341382
break;
342383
}

net/rds/bind.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,10 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
181181
if (ret)
182182
goto out;
183183

184+
if (rs->rs_transport) { /* previously bound */
185+
ret = 0;
186+
goto out;
187+
}
184188
trans = rds_trans_get_preferred(sin->sin_addr.s_addr);
185189
if (!trans) {
186190
ret = -EADDRNOTAVAIL;

net/rds/rds.h

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -408,11 +408,6 @@ struct rds_notifier {
408408
* should try hard not to block.
409409
*/
410410

411-
#define RDS_TRANS_IB 0
412-
#define RDS_TRANS_IWARP 1
413-
#define RDS_TRANS_TCP 2
414-
#define RDS_TRANS_COUNT 3
415-
416411
struct rds_transport {
417412
char t_name[TRANSNAMSIZ];
418413
struct list_head t_item;
@@ -803,6 +798,7 @@ struct rds_transport *rds_trans_get_preferred(__be32 addr);
803798
void rds_trans_put(struct rds_transport *trans);
804799
unsigned int rds_trans_stats_info_copy(struct rds_info_iterator *iter,
805800
unsigned int avail);
801+
struct rds_transport *rds_trans_get(int t_type);
806802
int rds_trans_init(void);
807803
void rds_trans_exit(void);
808804

net/rds/transport.c

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,27 @@ struct rds_transport *rds_trans_get_preferred(__be32 addr)
101101
return ret;
102102
}
103103

104+
struct rds_transport *rds_trans_get(int t_type)
105+
{
106+
struct rds_transport *ret = NULL;
107+
struct rds_transport *trans;
108+
unsigned int i;
109+
110+
down_read(&rds_trans_sem);
111+
for (i = 0; i < RDS_TRANS_COUNT; i++) {
112+
trans = transports[i];
113+
114+
if (trans && trans->t_type == t_type &&
115+
(!trans->t_owner || try_module_get(trans->t_owner))) {
116+
ret = trans;
117+
break;
118+
}
119+
}
120+
up_read(&rds_trans_sem);
121+
122+
return ret;
123+
}
124+
104125
/*
105126
* This returns the number of stats entries in the snapshot and only
106127
* copies them using the iter if there is enough space for them. The

0 commit comments

Comments
 (0)