2929
3030#define MAX_KEY_LEN 100
3131
32+ /*
33+ * blkcg_pol_mutex protects blkcg_policy[] and policy [de]activation.
34+ * blkcg_pol_register_mutex nests outside of it and synchronizes entire
35+ * policy [un]register operations including cgroup file additions /
36+ * removals. Putting cgroup file registration outside blkcg_pol_mutex
37+ * allows grabbing it from cgroup callbacks.
38+ */
39+ static DEFINE_MUTEX (blkcg_pol_register_mutex );
3240static DEFINE_MUTEX (blkcg_pol_mutex );
3341
3442struct blkcg blkcg_root ;
@@ -38,6 +46,8 @@ struct cgroup_subsys_state * const blkcg_root_css = &blkcg_root.css;
3846
3947static struct blkcg_policy * blkcg_policy [BLKCG_MAX_POLS ];
4048
49+ static LIST_HEAD (all_blkcgs ); /* protected by blkcg_pol_mutex */
50+
4151static bool blkcg_policy_enabled (struct request_queue * q ,
4252 const struct blkcg_policy * pol )
4353{
@@ -453,20 +463,7 @@ static int blkcg_reset_stats(struct cgroup_subsys_state *css,
453463 struct blkcg_gq * blkg ;
454464 int i ;
455465
456- /*
457- * XXX: We invoke cgroup_add/rm_cftypes() under blkcg_pol_mutex
458- * which ends up putting cgroup's internal cgroup_tree_mutex under
459- * it; however, cgroup_tree_mutex is nested above cgroup file
460- * active protection and grabbing blkcg_pol_mutex from a cgroup
461- * file operation creates a possible circular dependency. cgroup
462- * internal locking is planned to go through further simplification
463- * and this issue should go away soon. For now, let's trylock
464- * blkcg_pol_mutex and restart the write on failure.
465- *
466- * http://lkml.kernel.org/g/5363C04B.4010400@oracle.com
467- */
468- if (!mutex_trylock (& blkcg_pol_mutex ))
469- return restart_syscall ();
466+ mutex_lock (& blkcg_pol_mutex );
470467 spin_lock_irq (& blkcg -> lock );
471468
472469 /*
@@ -822,8 +819,17 @@ static void blkcg_css_free(struct cgroup_subsys_state *css)
822819{
823820 struct blkcg * blkcg = css_to_blkcg (css );
824821
825- if (blkcg != & blkcg_root )
822+ mutex_lock (& blkcg_pol_mutex );
823+ list_del (& blkcg -> all_blkcgs_node );
824+ mutex_unlock (& blkcg_pol_mutex );
825+
826+ if (blkcg != & blkcg_root ) {
827+ int i ;
828+
829+ for (i = 0 ; i < BLKCG_MAX_POLS ; i ++ )
830+ kfree (blkcg -> pd [i ]);
826831 kfree (blkcg );
832+ }
827833}
828834
829835static struct cgroup_subsys_state *
@@ -833,6 +839,8 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
833839 struct cgroup_subsys_state * ret ;
834840 int i ;
835841
842+ mutex_lock (& blkcg_pol_mutex );
843+
836844 if (!parent_css ) {
837845 blkcg = & blkcg_root ;
838846 goto done ;
@@ -875,14 +883,17 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
875883#ifdef CONFIG_CGROUP_WRITEBACK
876884 INIT_LIST_HEAD (& blkcg -> cgwb_list );
877885#endif
886+ list_add_tail (& blkcg -> all_blkcgs_node , & all_blkcgs );
887+
888+ mutex_unlock (& blkcg_pol_mutex );
878889 return & blkcg -> css ;
879890
880891free_pd_blkcg :
881892 for (i -- ; i >= 0 ; i -- )
882893 kfree (blkcg -> pd [i ]);
883-
884894free_blkcg :
885895 kfree (blkcg );
896+ mutex_unlock (& blkcg_pol_mutex );
886897 return ret ;
887898}
888899
@@ -1037,10 +1048,8 @@ int blkcg_activate_policy(struct request_queue *q,
10371048 const struct blkcg_policy * pol )
10381049{
10391050 LIST_HEAD (pds );
1040- LIST_HEAD (cpds );
10411051 struct blkcg_gq * blkg ;
10421052 struct blkg_policy_data * pd , * nd ;
1043- struct blkcg_policy_data * cpd , * cnd ;
10441053 int cnt = 0 , ret ;
10451054
10461055 if (blkcg_policy_enabled (q , pol ))
@@ -1053,26 +1062,14 @@ int blkcg_activate_policy(struct request_queue *q,
10531062 cnt ++ ;
10541063 spin_unlock_irq (q -> queue_lock );
10551064
1056- /*
1057- * Allocate per-blkg and per-blkcg policy data
1058- * for all existing blkgs.
1059- */
1065+ /* allocate per-blkg policy data for all existing blkgs */
10601066 while (cnt -- ) {
10611067 pd = kzalloc_node (pol -> pd_size , GFP_KERNEL , q -> node );
10621068 if (!pd ) {
10631069 ret = - ENOMEM ;
10641070 goto out_free ;
10651071 }
10661072 list_add_tail (& pd -> alloc_node , & pds );
1067-
1068- if (!pol -> cpd_size )
1069- continue ;
1070- cpd = kzalloc_node (pol -> cpd_size , GFP_KERNEL , q -> node );
1071- if (!cpd ) {
1072- ret = - ENOMEM ;
1073- goto out_free ;
1074- }
1075- list_add_tail (& cpd -> alloc_node , & cpds );
10761073 }
10771074
10781075 /*
@@ -1082,32 +1079,17 @@ int blkcg_activate_policy(struct request_queue *q,
10821079 spin_lock_irq (q -> queue_lock );
10831080
10841081 list_for_each_entry (blkg , & q -> blkg_list , q_node ) {
1085- if (WARN_ON (list_empty (& pds )) ||
1086- WARN_ON (pol -> cpd_size && list_empty (& cpds ))) {
1082+ if (WARN_ON (list_empty (& pds ))) {
10871083 /* umm... this shouldn't happen, just abort */
10881084 ret = - ENOMEM ;
10891085 goto out_unlock ;
10901086 }
1091- cpd = list_first_entry (& cpds , struct blkcg_policy_data ,
1092- alloc_node );
1093- list_del_init (& cpd -> alloc_node );
10941087 pd = list_first_entry (& pds , struct blkg_policy_data , alloc_node );
10951088 list_del_init (& pd -> alloc_node );
10961089
10971090 /* grab blkcg lock too while installing @pd on @blkg */
10981091 spin_lock (& blkg -> blkcg -> lock );
10991092
1100- if (!pol -> cpd_size )
1101- goto no_cpd ;
1102- if (!blkg -> blkcg -> pd [pol -> plid ]) {
1103- /* Per-policy per-blkcg data */
1104- blkg -> blkcg -> pd [pol -> plid ] = cpd ;
1105- cpd -> plid = pol -> plid ;
1106- pol -> cpd_init_fn (blkg -> blkcg );
1107- } else { /* must free it as it has already been extracted */
1108- kfree (cpd );
1109- }
1110- no_cpd :
11111093 blkg -> pd [pol -> plid ] = pd ;
11121094 pd -> blkg = blkg ;
11131095 pd -> plid = pol -> plid ;
@@ -1124,8 +1106,6 @@ int blkcg_activate_policy(struct request_queue *q,
11241106 blk_queue_bypass_end (q );
11251107 list_for_each_entry_safe (pd , nd , & pds , alloc_node )
11261108 kfree (pd );
1127- list_for_each_entry_safe (cpd , cnd , & cpds , alloc_node )
1128- kfree (cpd );
11291109 return ret ;
11301110}
11311111EXPORT_SYMBOL_GPL (blkcg_activate_policy );
@@ -1162,8 +1142,6 @@ void blkcg_deactivate_policy(struct request_queue *q,
11621142
11631143 kfree (blkg -> pd [pol -> plid ]);
11641144 blkg -> pd [pol -> plid ] = NULL ;
1165- kfree (blkg -> blkcg -> pd [pol -> plid ]);
1166- blkg -> blkcg -> pd [pol -> plid ] = NULL ;
11671145
11681146 spin_unlock (& blkg -> blkcg -> lock );
11691147 }
@@ -1182,11 +1160,13 @@ EXPORT_SYMBOL_GPL(blkcg_deactivate_policy);
11821160 */
11831161int blkcg_policy_register (struct blkcg_policy * pol )
11841162{
1163+ struct blkcg * blkcg ;
11851164 int i , ret ;
11861165
11871166 if (WARN_ON (pol -> pd_size < sizeof (struct blkg_policy_data )))
11881167 return - EINVAL ;
11891168
1169+ mutex_lock (& blkcg_pol_register_mutex );
11901170 mutex_lock (& blkcg_pol_mutex );
11911171
11921172 /* find an empty slot */
@@ -1195,19 +1175,49 @@ int blkcg_policy_register(struct blkcg_policy *pol)
11951175 if (!blkcg_policy [i ])
11961176 break ;
11971177 if (i >= BLKCG_MAX_POLS )
1198- goto out_unlock ;
1178+ goto err_unlock ;
11991179
1200- /* register and update blkgs */
1180+ /* register @pol */
12011181 pol -> plid = i ;
1202- blkcg_policy [i ] = pol ;
1182+ blkcg_policy [pol -> plid ] = pol ;
1183+
1184+ /* allocate and install cpd's */
1185+ if (pol -> cpd_size ) {
1186+ list_for_each_entry (blkcg , & all_blkcgs , all_blkcgs_node ) {
1187+ struct blkcg_policy_data * cpd ;
1188+
1189+ cpd = kzalloc (pol -> cpd_size , GFP_KERNEL );
1190+ if (!cpd ) {
1191+ mutex_unlock (& blkcg_pol_mutex );
1192+ goto err_free_cpds ;
1193+ }
1194+
1195+ blkcg -> pd [pol -> plid ] = cpd ;
1196+ cpd -> plid = pol -> plid ;
1197+ pol -> cpd_init_fn (blkcg );
1198+ }
1199+ }
1200+
1201+ mutex_unlock (& blkcg_pol_mutex );
12031202
12041203 /* everything is in place, add intf files for the new policy */
12051204 if (pol -> cftypes )
12061205 WARN_ON (cgroup_add_legacy_cftypes (& blkio_cgrp_subsys ,
12071206 pol -> cftypes ));
1208- ret = 0 ;
1209- out_unlock :
1207+ mutex_unlock (& blkcg_pol_register_mutex );
1208+ return 0 ;
1209+
1210+ err_free_cpds :
1211+ if (pol -> cpd_size ) {
1212+ list_for_each_entry (blkcg , & all_blkcgs , all_blkcgs_node ) {
1213+ kfree (blkcg -> pd [pol -> plid ]);
1214+ blkcg -> pd [pol -> plid ] = NULL ;
1215+ }
1216+ }
1217+ blkcg_policy [pol -> plid ] = NULL ;
1218+ err_unlock :
12101219 mutex_unlock (& blkcg_pol_mutex );
1220+ mutex_unlock (& blkcg_pol_register_mutex );
12111221 return ret ;
12121222}
12131223EXPORT_SYMBOL_GPL (blkcg_policy_register );
@@ -1220,7 +1230,9 @@ EXPORT_SYMBOL_GPL(blkcg_policy_register);
12201230 */
12211231void blkcg_policy_unregister (struct blkcg_policy * pol )
12221232{
1223- mutex_lock (& blkcg_pol_mutex );
1233+ struct blkcg * blkcg ;
1234+
1235+ mutex_lock (& blkcg_pol_register_mutex );
12241236
12251237 if (WARN_ON (blkcg_policy [pol -> plid ] != pol ))
12261238 goto out_unlock ;
@@ -1229,9 +1241,19 @@ void blkcg_policy_unregister(struct blkcg_policy *pol)
12291241 if (pol -> cftypes )
12301242 cgroup_rm_cftypes (pol -> cftypes );
12311243
1232- /* unregister and update blkgs */
1244+ /* remove cpds and unregister */
1245+ mutex_lock (& blkcg_pol_mutex );
1246+
1247+ if (pol -> cpd_size ) {
1248+ list_for_each_entry (blkcg , & all_blkcgs , all_blkcgs_node ) {
1249+ kfree (blkcg -> pd [pol -> plid ]);
1250+ blkcg -> pd [pol -> plid ] = NULL ;
1251+ }
1252+ }
12331253 blkcg_policy [pol -> plid ] = NULL ;
1234- out_unlock :
1254+
12351255 mutex_unlock (& blkcg_pol_mutex );
1256+ out_unlock :
1257+ mutex_unlock (& blkcg_pol_register_mutex );
12361258}
12371259EXPORT_SYMBOL_GPL (blkcg_policy_unregister );
0 commit comments