Skip to content

Commit b42990d

Browse files
brooniectmarinas
authored andcommitted
arm64/sme: Identify supported SME vector lengths at boot
The vector lengths used for SME are controlled through a similar set of registers to those for SVE and enumerated using a similar algorithm with some slight differences due to the fact that unlike SVE there are no restrictions on which combinations of vector lengths can be supported nor any mandatory vector lengths which must be implemented. Add a new vector type and implement support for enumerating it. One slightly awkward feature is that we need to read the current vector length using a different instruction (or enter streaming mode which would have the same issue and be higher cost). Rather than add an ops structure we add special cases directly in the otherwise generic vec_probe_vqs() function, this is a bit inelegant but it's the only place where this is an issue. Signed-off-by: Mark Brown <broonie@kernel.org> Reviewed-by: Catalin Marinas <catalin.marinas@arm.com> Link: https://lore.kernel.org/r/20220419112247.711548-10-broonie@kernel.org Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
1 parent 5e64b86 commit b42990d

File tree

8 files changed

+218
-2
lines changed

8 files changed

+218
-2
lines changed

arch/arm64/include/asm/cpu.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,9 @@ struct cpuinfo_arm64 {
6464

6565
/* pseudo-ZCR for recording maximum ZCR_EL1 LEN value: */
6666
u64 reg_zcr;
67+
68+
/* pseudo-SMCR for recording maximum SMCR_EL1 LEN value: */
69+
u64 reg_smcr;
6770
};
6871

6972
DECLARE_PER_CPU(struct cpuinfo_arm64, cpu_data);

arch/arm64/include/asm/cpufeature.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -622,6 +622,13 @@ static inline bool id_aa64pfr0_sve(u64 pfr0)
622622
return val > 0;
623623
}
624624

625+
static inline bool id_aa64pfr1_sme(u64 pfr1)
626+
{
627+
u32 val = cpuid_feature_extract_unsigned_field(pfr1, ID_AA64PFR1_SME_SHIFT);
628+
629+
return val > 0;
630+
}
631+
625632
static inline bool id_aa64pfr1_mte(u64 pfr1)
626633
{
627634
u32 val = cpuid_feature_extract_unsigned_field(pfr1, ID_AA64PFR1_MTE_SHIFT);

arch/arm64/include/asm/fpsimd.h

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ extern void sme_kernel_enable(const struct arm64_cpu_capabilities *__unused);
7878
extern void fa64_kernel_enable(const struct arm64_cpu_capabilities *__unused);
7979

8080
extern u64 read_zcr_features(void);
81+
extern u64 read_smcr_features(void);
8182

8283
/*
8384
* Helpers to translate bit indices in sve_vq_map to VQ values (and
@@ -172,6 +173,12 @@ static inline void write_vl(enum vec_type type, u64 val)
172173
tmp = read_sysreg_s(SYS_ZCR_EL1) & ~ZCR_ELx_LEN_MASK;
173174
write_sysreg_s(tmp | val, SYS_ZCR_EL1);
174175
break;
176+
#endif
177+
#ifdef CONFIG_ARM64_SME
178+
case ARM64_VEC_SME:
179+
tmp = read_sysreg_s(SYS_SMCR_EL1) & ~SMCR_ELx_LEN_MASK;
180+
write_sysreg_s(tmp | val, SYS_SMCR_EL1);
181+
break;
175182
#endif
176183
default:
177184
WARN_ON_ONCE(1);
@@ -268,12 +275,31 @@ static inline void sme_smstop(void)
268275
asm volatile(__msr_s(SYS_SVCR_SMSTOP_SMZA_EL0, "xzr"));
269276
}
270277

278+
extern void __init sme_setup(void);
279+
280+
static inline int sme_max_vl(void)
281+
{
282+
return vec_max_vl(ARM64_VEC_SME);
283+
}
284+
285+
static inline int sme_max_virtualisable_vl(void)
286+
{
287+
return vec_max_virtualisable_vl(ARM64_VEC_SME);
288+
}
289+
290+
extern unsigned int sme_get_vl(void);
291+
271292
#else
272293

273294
static inline void sme_smstart_sm(void) { }
274295
static inline void sme_smstop_sm(void) { }
275296
static inline void sme_smstop(void) { }
276297

298+
static inline void sme_setup(void) { }
299+
static inline unsigned int sme_get_vl(void) { return 0; }
300+
static inline int sme_max_vl(void) { return 0; }
301+
static inline int sme_max_virtualisable_vl(void) { return 0; }
302+
277303
#endif /* ! CONFIG_ARM64_SME */
278304

279305
/* For use by EFI runtime services calls only */

arch/arm64/include/asm/processor.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ struct debug_info {
118118

119119
enum vec_type {
120120
ARM64_VEC_SVE = 0,
121+
ARM64_VEC_SME,
121122
ARM64_VEC_MAX,
122123
};
123124

arch/arm64/kernel/cpufeature.c

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -581,6 +581,12 @@ static const struct arm64_ftr_bits ftr_zcr[] = {
581581
ARM64_FTR_END,
582582
};
583583

584+
static const struct arm64_ftr_bits ftr_smcr[] = {
585+
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE,
586+
SMCR_ELx_LEN_SHIFT, SMCR_ELx_LEN_SIZE, 0), /* LEN */
587+
ARM64_FTR_END,
588+
};
589+
584590
/*
585591
* Common ftr bits for a 32bit register with all hidden, strict
586592
* attributes, with 4bit feature fields and a default safe value of
@@ -687,6 +693,7 @@ static const struct __ftr_reg_entry {
687693

688694
/* Op1 = 0, CRn = 1, CRm = 2 */
689695
ARM64_FTR_REG(SYS_ZCR_EL1, ftr_zcr),
696+
ARM64_FTR_REG(SYS_SMCR_EL1, ftr_smcr),
690697

691698
/* Op1 = 1, CRn = 0, CRm = 0 */
692699
ARM64_FTR_REG(SYS_GMID_EL1, ftr_gmid),
@@ -991,6 +998,12 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
991998
vec_init_vq_map(ARM64_VEC_SVE);
992999
}
9931000

1001+
if (id_aa64pfr1_sme(info->reg_id_aa64pfr1)) {
1002+
init_cpu_ftr_reg(SYS_SMCR_EL1, info->reg_smcr);
1003+
if (IS_ENABLED(CONFIG_ARM64_SME))
1004+
vec_init_vq_map(ARM64_VEC_SME);
1005+
}
1006+
9941007
if (id_aa64pfr1_mte(info->reg_id_aa64pfr1))
9951008
init_cpu_ftr_reg(SYS_GMID_EL1, info->reg_gmid);
9961009

@@ -1217,6 +1230,9 @@ void update_cpu_features(int cpu,
12171230
taint |= check_update_ftr_reg(SYS_ID_AA64ZFR0_EL1, cpu,
12181231
info->reg_id_aa64zfr0, boot->reg_id_aa64zfr0);
12191232

1233+
taint |= check_update_ftr_reg(SYS_ID_AA64SMFR0_EL1, cpu,
1234+
info->reg_id_aa64smfr0, boot->reg_id_aa64smfr0);
1235+
12201236
if (id_aa64pfr0_sve(info->reg_id_aa64pfr0)) {
12211237
taint |= check_update_ftr_reg(SYS_ZCR_EL1, cpu,
12221238
info->reg_zcr, boot->reg_zcr);
@@ -1227,6 +1243,16 @@ void update_cpu_features(int cpu,
12271243
vec_update_vq_map(ARM64_VEC_SVE);
12281244
}
12291245

1246+
if (id_aa64pfr1_sme(info->reg_id_aa64pfr1)) {
1247+
taint |= check_update_ftr_reg(SYS_SMCR_EL1, cpu,
1248+
info->reg_smcr, boot->reg_smcr);
1249+
1250+
/* Probe vector lengths, unless we already gave up on SME */
1251+
if (id_aa64pfr1_sme(read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1)) &&
1252+
!system_capabilities_finalized())
1253+
vec_update_vq_map(ARM64_VEC_SME);
1254+
}
1255+
12301256
/*
12311257
* The kernel uses the LDGM/STGM instructions and the number of tags
12321258
* they read/write depends on the GMID_EL1.BS field. Check that the
@@ -2931,6 +2957,23 @@ static void verify_sve_features(void)
29312957
/* Add checks on other ZCR bits here if necessary */
29322958
}
29332959

2960+
static void verify_sme_features(void)
2961+
{
2962+
u64 safe_smcr = read_sanitised_ftr_reg(SYS_SMCR_EL1);
2963+
u64 smcr = read_smcr_features();
2964+
2965+
unsigned int safe_len = safe_smcr & SMCR_ELx_LEN_MASK;
2966+
unsigned int len = smcr & SMCR_ELx_LEN_MASK;
2967+
2968+
if (len < safe_len || vec_verify_vq_map(ARM64_VEC_SME)) {
2969+
pr_crit("CPU%d: SME: vector length support mismatch\n",
2970+
smp_processor_id());
2971+
cpu_die_early();
2972+
}
2973+
2974+
/* Add checks on other SMCR bits here if necessary */
2975+
}
2976+
29342977
static void verify_hyp_capabilities(void)
29352978
{
29362979
u64 safe_mmfr1, mmfr0, mmfr1;
@@ -2983,6 +3026,9 @@ static void verify_local_cpu_capabilities(void)
29833026
if (system_supports_sve())
29843027
verify_sve_features();
29853028

3029+
if (system_supports_sme())
3030+
verify_sme_features();
3031+
29863032
if (is_hyp_mode_available())
29873033
verify_hyp_capabilities();
29883034
}
@@ -3100,6 +3146,7 @@ void __init setup_cpu_features(void)
31003146
pr_info("emulated: Privileged Access Never (PAN) using TTBR0_EL1 switching\n");
31013147

31023148
sve_setup();
3149+
sme_setup();
31033150
minsigstksz_setup();
31043151

31053152
/* Advertise that we have computed the system capabilities */

arch/arm64/kernel/cpuinfo.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -421,6 +421,10 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
421421
id_aa64pfr0_sve(info->reg_id_aa64pfr0))
422422
info->reg_zcr = read_zcr_features();
423423

424+
if (IS_ENABLED(CONFIG_ARM64_SME) &&
425+
id_aa64pfr1_sme(info->reg_id_aa64pfr1))
426+
info->reg_smcr = read_smcr_features();
427+
424428
cpuinfo_detect_icache_policy(info);
425429
}
426430

arch/arm64/kernel/entry-fpsimd.S

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,3 +86,12 @@ SYM_FUNC_START(sve_flush_live)
8686
SYM_FUNC_END(sve_flush_live)
8787

8888
#endif /* CONFIG_ARM64_SVE */
89+
90+
#ifdef CONFIG_ARM64_SME
91+
92+
SYM_FUNC_START(sme_get_vl)
93+
_sme_rdsvl 0, 1
94+
ret
95+
SYM_FUNC_END(sme_get_vl)
96+
97+
#endif /* CONFIG_ARM64_SME */

arch/arm64/kernel/fpsimd.c

Lines changed: 121 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,12 @@ __ro_after_init struct vl_info vl_info[ARM64_VEC_MAX] = {
136136
.max_virtualisable_vl = SVE_VL_MIN,
137137
},
138138
#endif
139+
#ifdef CONFIG_ARM64_SME
140+
[ARM64_VEC_SME] = {
141+
.type = ARM64_VEC_SME,
142+
.name = "SME",
143+
},
144+
#endif
139145
};
140146

141147
static unsigned int vec_vl_inherit_flag(enum vec_type type)
@@ -186,6 +192,20 @@ extern void __percpu *efi_sve_state;
186192

187193
#endif /* ! CONFIG_ARM64_SVE */
188194

195+
#ifdef CONFIG_ARM64_SME
196+
197+
static int get_sme_default_vl(void)
198+
{
199+
return get_default_vl(ARM64_VEC_SME);
200+
}
201+
202+
static void set_sme_default_vl(int val)
203+
{
204+
set_default_vl(ARM64_VEC_SME, val);
205+
}
206+
207+
#endif
208+
189209
DEFINE_PER_CPU(bool, fpsimd_context_busy);
190210
EXPORT_PER_CPU_SYMBOL(fpsimd_context_busy);
191211

@@ -409,6 +429,8 @@ static unsigned int find_supported_vector_length(enum vec_type type,
409429

410430
if (vl > max_vl)
411431
vl = max_vl;
432+
if (vl < info->min_vl)
433+
vl = info->min_vl;
412434

413435
bit = find_next_bit(info->vq_map, SVE_VQ_MAX,
414436
__vq_to_bit(sve_vq_from_vl(vl)));
@@ -770,7 +792,23 @@ static void vec_probe_vqs(struct vl_info *info,
770792

771793
for (vq = SVE_VQ_MAX; vq >= SVE_VQ_MIN; --vq) {
772794
write_vl(info->type, vq - 1); /* self-syncing */
773-
vl = sve_get_vl();
795+
796+
switch (info->type) {
797+
case ARM64_VEC_SVE:
798+
vl = sve_get_vl();
799+
break;
800+
case ARM64_VEC_SME:
801+
vl = sme_get_vl();
802+
break;
803+
default:
804+
vl = 0;
805+
break;
806+
}
807+
808+
/* Minimum VL identified? */
809+
if (sve_vq_from_vl(vl) > vq)
810+
break;
811+
774812
vq = sve_vq_from_vl(vl); /* skip intervening lengths */
775813
set_bit(__vq_to_bit(vq), map);
776814
}
@@ -1017,7 +1055,88 @@ void fa64_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
10171055
SYS_SMCR_EL1);
10181056
}
10191057

1020-
#endif /* CONFIG_ARM64_SVE */
1058+
/*
1059+
* Read the pseudo-SMCR used by cpufeatures to identify the supported
1060+
* vector length.
1061+
*
1062+
* Use only if SME is present.
1063+
* This function clobbers the SME vector length.
1064+
*/
1065+
u64 read_smcr_features(void)
1066+
{
1067+
u64 smcr;
1068+
unsigned int vq_max;
1069+
1070+
sme_kernel_enable(NULL);
1071+
sme_smstart_sm();
1072+
1073+
/*
1074+
* Set the maximum possible VL.
1075+
*/
1076+
write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_LEN_MASK,
1077+
SYS_SMCR_EL1);
1078+
1079+
smcr = read_sysreg_s(SYS_SMCR_EL1);
1080+
smcr &= ~(u64)SMCR_ELx_LEN_MASK; /* Only the LEN field */
1081+
vq_max = sve_vq_from_vl(sve_get_vl());
1082+
smcr |= vq_max - 1; /* set LEN field to maximum effective value */
1083+
1084+
sme_smstop_sm();
1085+
1086+
return smcr;
1087+
}
1088+
1089+
void __init sme_setup(void)
1090+
{
1091+
struct vl_info *info = &vl_info[ARM64_VEC_SME];
1092+
u64 smcr;
1093+
int min_bit;
1094+
1095+
if (!system_supports_sme())
1096+
return;
1097+
1098+
/*
1099+
* SME doesn't require any particular vector length be
1100+
* supported but it does require at least one. We should have
1101+
* disabled the feature entirely while bringing up CPUs but
1102+
* let's double check here.
1103+
*/
1104+
WARN_ON(bitmap_empty(info->vq_map, SVE_VQ_MAX));
1105+
1106+
min_bit = find_last_bit(info->vq_map, SVE_VQ_MAX);
1107+
info->min_vl = sve_vl_from_vq(__bit_to_vq(min_bit));
1108+
1109+
smcr = read_sanitised_ftr_reg(SYS_SMCR_EL1);
1110+
info->max_vl = sve_vl_from_vq((smcr & SMCR_ELx_LEN_MASK) + 1);
1111+
1112+
/*
1113+
* Sanity-check that the max VL we determined through CPU features
1114+
* corresponds properly to sme_vq_map. If not, do our best:
1115+
*/
1116+
if (WARN_ON(info->max_vl != find_supported_vector_length(ARM64_VEC_SME,
1117+
info->max_vl)))
1118+
info->max_vl = find_supported_vector_length(ARM64_VEC_SME,
1119+
info->max_vl);
1120+
1121+
WARN_ON(info->min_vl > info->max_vl);
1122+
1123+
/*
1124+
* For the default VL, pick the maximum supported value <= 32
1125+
* (256 bits) if there is one since this is guaranteed not to
1126+
* grow the signal frame when in streaming mode, otherwise the
1127+
* minimum available VL will be used.
1128+
*/
1129+
set_sme_default_vl(find_supported_vector_length(ARM64_VEC_SME, 32));
1130+
1131+
pr_info("SME: minimum available vector length %u bytes per vector\n",
1132+
info->min_vl);
1133+
pr_info("SME: maximum available vector length %u bytes per vector\n",
1134+
info->max_vl);
1135+
pr_info("SME: default vector length %u bytes per vector\n",
1136+
get_sme_default_vl());
1137+
}
1138+
1139+
#endif /* CONFIG_ARM64_SME */
10211140

10221141
/*
10231142
* Trapped SVE access

0 commit comments

Comments
 (0)