[API Proposal]: Arm64: FEAT_SVE: fp #94005

a74nh · 2023-10-26T09:18:06Z

namespace System.Runtime.Intrinsics.Arm

/// VectorT Summary
public abstract class Sve : AdvSimd /// Feature: FEAT_SVE  Category: fp
{

  /// T: float, double
  public static unsafe Vector<T> AddRotateComplex(Vector<T> op1, Vector<T> op2, ulong imm_rotation); // FCADD // predicated, MOVPRFX

  /// T: float, double
  public static unsafe T AddSequentialAcross(T initial, Vector<T> op); // FADDA // predicated

  /// T: [double, float], [double, int], [double, long], [double, uint], [double, ulong]
  public static unsafe Vector<T> ConvertToDouble(Vector<T2> value); // FCVT or SCVTF or UCVTF // predicated, MOVPRFX

  /// T: [int, float], [int, double]
  public static unsafe Vector<T> ConvertToInt32(Vector<T2> value); // FCVTZS // predicated, MOVPRFX

  /// T: [long, float], [long, double]
  public static unsafe Vector<T> ConvertToInt64(Vector<T2> value); // FCVTZS // predicated, MOVPRFX

  /// T: [float, double], [float, int], [float, long], [float, uint], [float, ulong]
  public static unsafe Vector<T> ConvertToSingle(Vector<T2> value); // FCVT or SCVTF or UCVTF // predicated, MOVPRFX

  /// T: [uint, float], [uint, double]
  public static unsafe Vector<T> ConvertToUInt32(Vector<T2> value); // FCVTZU // predicated, MOVPRFX

  /// T: [ulong, float], [ulong, double]
  public static unsafe Vector<T> ConvertToUInt64(Vector<T2> value); // FCVTZU // predicated, MOVPRFX

  /// T: [float, uint], [double, ulong]
  public static unsafe Vector<T> FloatingPointExponentialAccelerator(Vector<T2> value); // FEXPA

  /// T: float, double
  public static unsafe Vector<T> MultiplyAddRotateComplex(Vector<T> op1, Vector<T> op2, Vector<T> op3, ulong imm_rotation); // FCMLA // predicated, MOVPRFX

  public static unsafe Vector<float> MultiplyAddRotateComplex(Vector<float> op1, Vector<float> op2, Vector<float> op3, ulong imm_index, ulong imm_rotation); // FCMLA // MOVPRFX

  /// T: float, double
  public static unsafe Vector<T> ReciprocalEstimate(Vector<T> value); // FRECPE

  /// T: float, double
  public static unsafe Vector<T> ReciprocalExponent(Vector<T> value); // FRECPX // predicated, MOVPRFX

  /// T: float, double
  public static unsafe Vector<T> ReciprocalSqrtEstimate(Vector<T> value); // FRSQRTE

  /// T: float, double
  public static unsafe Vector<T> ReciprocalSqrtStep(Vector<T> left, Vector<T> right); // FRSQRTS

  /// T: float, double
  public static unsafe Vector<T> ReciprocalStep(Vector<T> left, Vector<T> right); // FRECPS

  /// T: float, double
  public static unsafe Vector<T> RoundAwayFromZero(Vector<T> value); // FRINTA // predicated, MOVPRFX

  /// T: float, double
  public static unsafe Vector<T> RoundToNearest(Vector<T> value); // FRINTN // predicated, MOVPRFX

  /// T: float, double
  public static unsafe Vector<T> RoundToNegativeInfinity(Vector<T> value); // FRINTM // predicated, MOVPRFX

  /// T: float, double
  public static unsafe Vector<T> RoundToPositiveInfinity(Vector<T> value); // FRINTP // predicated, MOVPRFX

  /// T: float, double
  public static unsafe Vector<T> RoundToZero(Vector<T> value); // FRINTZ // predicated, MOVPRFX

  /// T: [float, int], [double, long]
  public static unsafe Vector<T> Scale(Vector<T> left, Vector<T2> right); // FSCALE // predicated, MOVPRFX

  /// T: float, double
  public static unsafe Vector<T> Sqrt(Vector<T> value); // FSQRT // predicated, MOVPRFX

  /// T: float, double
  public static unsafe Vector<T> TrigonometricMultiplyAddCoefficient(Vector<T> op1, Vector<T> op2, ulong imm3); // FTMAD // MOVPRFX

  /// T: [float, uint], [double, ulong]
  public static unsafe Vector<T> TrigonometricSelectCoefficient(Vector<T> left, Vector<T2> right); // FTSSEL

  /// T: [float, uint], [double, ulong]
  public static unsafe Vector<T> TrigonometricStartingValue(Vector<T> left, Vector<T2> right); // FTSMUL

  /// total method signatures: 26


  /// Optional Entries:

  public static unsafe Vector<float> Scale(Vector<float> left, int right); // FSCALE // predicated, MOVPRFX

  public static unsafe Vector<double> Scale(Vector<double> left, long right); // FSCALE // predicated, MOVPRFX

  /// total optional method signatures: 2

}

Details

TrigonometricMultiplyAddCoefficient
Floating-point trigonometric multiply-add coefficient

Calculates the series terms for either sin(x) or cos(x), where the argument x has been adjusted to be in the range -π/4 < x ≤ π/4.

To calculate the series terms of sin(x) and cos(x) the initial source operands should be zero in the first source vector and x2 in the second source vector. The operation is then executed eight times to calculate the sum of eight series terms, which gives a result of sufficient precision.

The method multiplies each element of the first source vector by the absolute value of the corresponding element of the second source vector and performs a fused addition of each product with a value obtained from a table of hard-wired coefficients, and places the results destructively in the first source vector.

The coefficients are different for sin(x) and cos(x), and are selected by a combination of the sign bit in the second source element and an immediate index in the range 0 to 7.

See https://docsmirror.github.io/A64/2023-06/ftmad_z_zzi.html for the full coefficient tables.

TrigonometricSelectCoefficient
Floating-point trigonometric select coefficient

Selects the coefficient for the final multiplication in the polynomial series approximation. The instruction places the value 1.0 or a copy of the first source vector element in the destination element, depending on bit 0 of the quadrant number q held in the corresponding element of the second source vector. The sign bit of the destination element is copied from bit 1 of the corresponding value of q.

To compute sin(x) or cos(x) the instruction is executed with elements of the first source vector set to x, adjusted to be in the range -π/4 < x ≤ π/4.

The elements of the second source vector hold the corresponding value of the quadrant q number as an integer not a floating-point value. The value q satisfies the relationship (2q-1) × π/4 < x ≤ (2q+1) × π/4.

TrigonometricStartingValue
Floating-point trigonometric starting value

Calculates the initial value for TrigonometricMultiplyAddCoefficient. The method squares each element in the first source vector and then sets the sign bit to a copy of bit 0 of the corresponding element in the second source register, and places the results in the destination vector.

To compute sin(x) or cos(x) the instruction is executed with elements of the first source vector set to x, adjusted to be in the range -π/4 < x ≤ π/4.

The elements of the second source vector hold the corresponding value of the quadrant q number as an integer not a floating-point value. The value q satisfies the relationship (2q-1) × π/4 < x ≤ (2q+1) × π/4.

The text was updated successfully, but these errors were encountered:

ghost · 2023-10-26T09:18:13Z

Tagging subscribers to this area: @dotnet/area-system-numerics
See info in area-owners.md if you want to be subscribed.

Issue Details

namespace System.Runtime.Intrinsics.Arm

/// VectorT Summary
public abstract class Sve : AdvSimd /// Feature: FEAT_SVE  Category: fp
{
  /// T: float, double
  public static unsafe T AddOrderedReduce(T initial, Vector<T> op); // FADDA

  /// T: [float, int], [double, long]
  public static unsafe Vector<T> AdjustExponent(Vector<T> left, Vector<T2> right); // FSCALE (MOVPRFX)

  /// T: float, double
  public static unsafe Vector<T> ComplexAddRotate(Vector<T> op1, Vector<T> op2, ulong imm_rotation); // FCADD (MOVPRFX)

  /// T: float, double
  public static unsafe Vector<T> ComplexMultiplyAddRotate(Vector<T> op1, Vector<T> op2, Vector<T> op3, ulong imm_rotation); // FCMLA (MOVPRFX)

  public static unsafe Vector<float> ComplexMultiplyAddRotate(Vector<float> op1, Vector<float> op2, Vector<float> op3, ulong imm_index, ulong imm_rotation);

  /// T: [float, double], [double, float], [int, float], [int, double], [long, float], [long, double], [uint, float], [uint, double], [ulong, float], [ulong, double], [float, int], [float, long], [float, uint], [float, ulong], [double, int], [double, long], [double, uint], [double, ulong]
  public static unsafe Vector<T> FloatingPointConvert(Vector<T2> value); // FCVT or FCVTZS or FCVTZU or SCVTF or UCVTF (MOVPRFX)

  /// T: [float, uint], [double, ulong]
  public static unsafe Vector<T> FloatingPointExponentialAccelerator(Vector<T2> value); // FEXPA

  /// T: float, double
  public static unsafe Vector<T> ReciprocalEstimate(Vector<T> value); // FRECPE

  /// T: float, double
  public static unsafe Vector<T> ReciprocalExponent(Vector<T> value); // FRECPX (MOVPRFX)

  /// T: float, double
  public static unsafe Vector<T> ReciprocalSquareRootEstimate(Vector<T> value); // FRSQRTE

  /// T: float, double
  public static unsafe Vector<T> ReciprocalSquareRootStep(Vector<T> left, Vector<T> right); // FRSQRTS

  /// T: float, double
  public static unsafe Vector<T> ReciprocalStep(Vector<T> left, Vector<T> right); // FRECPS

  /// T: float, double
  public static unsafe Vector<T> RoundToNearestTiesAwayFromZero(Vector<T> value); // FRINTA (MOVPRFX)

  /// T: float, double
  public static unsafe Vector<T> RoundToNearestTiesToEven(Vector<T> value); // FRINTN (MOVPRFX)

  /// T: float, double
  public static unsafe Vector<T> RoundTowardsWithMergeTowardMinusInfinity(Vector<T> value); // FRINTM (MOVPRFX)

  /// T: float, double
  public static unsafe Vector<T> RoundTowardsWithMergeTowardPlusInfinity(Vector<T> value); // FRINTP (MOVPRFX)

  /// T: float, double
  public static unsafe Vector<T> RoundTowardsZero(Vector<T> value); // FRINTZ (MOVPRFX)

  /// T: float, double
  public static unsafe Vector<T> RoundUsingCurrentRoundingModeExact(Vector<T> value); // FRINTX (MOVPRFX)

  /// T: float, double
  public static unsafe Vector<T> RoundUsingCurrentRoundingModeInexact(Vector<T> value); // FRINTI (MOVPRFX)

  /// T: float, double
  public static unsafe Vector<T> SquareRoot(Vector<T> value); // FSQRT (MOVPRFX)

  /// T: float, double
  public static unsafe Vector<T> TrigonometricMultiplyAddCoefficient(Vector<T> op1, Vector<T> op2, ulong imm3); // FTMAD (MOVPRFX)

  /// T: [float, uint], [double, ulong]
  public static unsafe Vector<T> TrigonometricSelectCoefficient(Vector<T> left, Vector<T2> right); // FTSSEL

  /// T: [float, uint], [double, ulong]
  public static unsafe Vector<T> TrigonometricStartingValue(Vector<T> left, Vector<T2> right); // FTSMUL

  /// total method signatures: 23
}

Author:	a74nh
Assignees:	-
Labels:	`area-System.Numerics`
Milestone:	-

a74nh · 2023-10-26T09:18:18Z

/// Full API
public abstract class Sve : AdvSimd /// Feature: FEAT_SVE  Category: fp
{
    /// AddRotateComplex : Complex add with rotate

    /// svfloat32_t svcadd[_f32]_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, uint64_t imm_rotation) : "FCADD Ztied1.S, Pg/M, Ztied1.S, Zop2.S, #imm_rotation" or "MOVPRFX Zresult, Zop1; FCADD Zresult.S, Pg/M, Zresult.S, Zop2.S, #imm_rotation"
    /// svfloat32_t svcadd[_f32]_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, uint64_t imm_rotation) : "FCADD Ztied1.S, Pg/M, Ztied1.S, Zop2.S, #imm_rotation" or "MOVPRFX Zresult, Zop1; FCADD Zresult.S, Pg/M, Zresult.S, Zop2.S, #imm_rotation"
    /// svfloat32_t svcadd[_f32]_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, uint64_t imm_rotation) : "MOVPRFX Zresult.S, Pg/Z, Zop1.S; FCADD Zresult.S, Pg/M, Zresult.S, Zop2.S, #imm_rotation"
  public static unsafe Vector<float> AddRotateComplex(Vector<float> op1, Vector<float> op2, ulong imm_rotation);

    /// svfloat64_t svcadd[_f64]_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, uint64_t imm_rotation) : "FCADD Ztied1.D, Pg/M, Ztied1.D, Zop2.D, #imm_rotation" or "MOVPRFX Zresult, Zop1; FCADD Zresult.D, Pg/M, Zresult.D, Zop2.D, #imm_rotation"
    /// svfloat64_t svcadd[_f64]_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, uint64_t imm_rotation) : "FCADD Ztied1.D, Pg/M, Ztied1.D, Zop2.D, #imm_rotation" or "MOVPRFX Zresult, Zop1; FCADD Zresult.D, Pg/M, Zresult.D, Zop2.D, #imm_rotation"
    /// svfloat64_t svcadd[_f64]_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, uint64_t imm_rotation) : "MOVPRFX Zresult.D, Pg/Z, Zop1.D; FCADD Zresult.D, Pg/M, Zresult.D, Zop2.D, #imm_rotation"
  public static unsafe Vector<double> AddRotateComplex(Vector<double> op1, Vector<double> op2, ulong imm_rotation);


    /// AddSequentialAcross : Add reduction (strictly-ordered)

    /// float32_t svadda[_f32](svbool_t pg, float32_t initial, svfloat32_t op) : "FADDA Stied, Pg, Stied, Zop.S"
  public static unsafe float AddSequentialAcross(float initial, Vector<float> op);

    /// float64_t svadda[_f64](svbool_t pg, float64_t initial, svfloat64_t op) : "FADDA Dtied, Pg, Dtied, Zop.D"
  public static unsafe double AddSequentialAcross(double initial, Vector<double> op);


    /// ConvertToDouble : Floating-point convert

    /// svfloat64_t svcvt_f64[_f32]_m(svfloat64_t inactive, svbool_t pg, svfloat32_t op) : "FCVT Ztied.D, Pg/M, Zop.S" or "MOVPRFX Zresult, Zinactive; FCVT Zresult.D, Pg/M, Zop.S"
    /// svfloat64_t svcvt_f64[_f32]_x(svbool_t pg, svfloat32_t op) : "FCVT Ztied.D, Pg/M, Ztied.S" or "MOVPRFX Zresult, Zop; FCVT Zresult.D, Pg/M, Zop.S"
    /// svfloat64_t svcvt_f64[_f32]_z(svbool_t pg, svfloat32_t op) : "MOVPRFX Zresult.D, Pg/Z, Zop.D; FCVT Zresult.D, Pg/M, Zop.S"
  public static unsafe Vector<double> ConvertToDouble(Vector<float> value);

    /// svfloat64_t svcvt_f64[_s32]_m(svfloat64_t inactive, svbool_t pg, svint32_t op) : "SCVTF Ztied.D, Pg/M, Zop.S" or "MOVPRFX Zresult, Zinactive; SCVTF Zresult.D, Pg/M, Zop.S"
    /// svfloat64_t svcvt_f64[_s32]_x(svbool_t pg, svint32_t op) : "SCVTF Ztied.D, Pg/M, Ztied.S" or "MOVPRFX Zresult, Zop; SCVTF Zresult.D, Pg/M, Zop.S"
    /// svfloat64_t svcvt_f64[_s32]_z(svbool_t pg, svint32_t op) : "MOVPRFX Zresult.D, Pg/Z, Zop.D; SCVTF Zresult.D, Pg/M, Zop.S"
  public static unsafe Vector<double> ConvertToDouble(Vector<int> value);

    /// svfloat64_t svcvt_f64[_s64]_m(svfloat64_t inactive, svbool_t pg, svint64_t op) : "SCVTF Ztied.D, Pg/M, Zop.D" or "MOVPRFX Zresult, Zinactive; SCVTF Zresult.D, Pg/M, Zop.D"
    /// svfloat64_t svcvt_f64[_s64]_x(svbool_t pg, svint64_t op) : "SCVTF Ztied.D, Pg/M, Ztied.D" or "MOVPRFX Zresult, Zop; SCVTF Zresult.D, Pg/M, Zop.D"
    /// svfloat64_t svcvt_f64[_s64]_z(svbool_t pg, svint64_t op) : "MOVPRFX Zresult.D, Pg/Z, Zop.D; SCVTF Zresult.D, Pg/M, Zop.D"
  public static unsafe Vector<double> ConvertToDouble(Vector<long> value);

    /// svfloat64_t svcvt_f64[_u32]_m(svfloat64_t inactive, svbool_t pg, svuint32_t op) : "UCVTF Ztied.D, Pg/M, Zop.S" or "MOVPRFX Zresult, Zinactive; UCVTF Zresult.D, Pg/M, Zop.S"
    /// svfloat64_t svcvt_f64[_u32]_x(svbool_t pg, svuint32_t op) : "UCVTF Ztied.D, Pg/M, Ztied.S" or "MOVPRFX Zresult, Zop; UCVTF Zresult.D, Pg/M, Zop.S"
    /// svfloat64_t svcvt_f64[_u32]_z(svbool_t pg, svuint32_t op) : "MOVPRFX Zresult.D, Pg/Z, Zop.D; UCVTF Zresult.D, Pg/M, Zop.S"
  public static unsafe Vector<double> ConvertToDouble(Vector<uint> value);

    /// svfloat64_t svcvt_f64[_u64]_m(svfloat64_t inactive, svbool_t pg, svuint64_t op) : "UCVTF Ztied.D, Pg/M, Zop.D" or "MOVPRFX Zresult, Zinactive; UCVTF Zresult.D, Pg/M, Zop.D"
    /// svfloat64_t svcvt_f64[_u64]_x(svbool_t pg, svuint64_t op) : "UCVTF Ztied.D, Pg/M, Ztied.D" or "MOVPRFX Zresult, Zop; UCVTF Zresult.D, Pg/M, Zop.D"
    /// svfloat64_t svcvt_f64[_u64]_z(svbool_t pg, svuint64_t op) : "MOVPRFX Zresult.D, Pg/Z, Zop.D; UCVTF Zresult.D, Pg/M, Zop.D"
  public static unsafe Vector<double> ConvertToDouble(Vector<ulong> value);


    /// ConvertToInt32 : Floating-point convert

    /// svint32_t svcvt_s32[_f32]_m(svint32_t inactive, svbool_t pg, svfloat32_t op) : "FCVTZS Ztied.S, Pg/M, Zop.S" or "MOVPRFX Zresult, Zinactive; FCVTZS Zresult.S, Pg/M, Zop.S"
    /// svint32_t svcvt_s32[_f32]_x(svbool_t pg, svfloat32_t op) : "FCVTZS Ztied.S, Pg/M, Ztied.S" or "MOVPRFX Zresult, Zop; FCVTZS Zresult.S, Pg/M, Zop.S"
    /// svint32_t svcvt_s32[_f32]_z(svbool_t pg, svfloat32_t op) : "MOVPRFX Zresult.S, Pg/Z, Zop.S; FCVTZS Zresult.S, Pg/M, Zop.S"
  public static unsafe Vector<int> ConvertToInt32(Vector<float> value);

    /// svint32_t svcvt_s32[_f64]_m(svint32_t inactive, svbool_t pg, svfloat64_t op) : "FCVTZS Ztied.S, Pg/M, Zop.D" or "MOVPRFX Zresult, Zinactive; FCVTZS Zresult.S, Pg/M, Zop.D"
    /// svint32_t svcvt_s32[_f64]_x(svbool_t pg, svfloat64_t op) : "FCVTZS Ztied.S, Pg/M, Ztied.D" or "MOVPRFX Zresult, Zop; FCVTZS Zresult.S, Pg/M, Zop.D"
    /// svint32_t svcvt_s32[_f64]_z(svbool_t pg, svfloat64_t op) : "MOVPRFX Zresult.D, Pg/Z, Zop.D; FCVTZS Zresult.S, Pg/M, Zop.D"
  public static unsafe Vector<int> ConvertToInt32(Vector<double> value);


    /// ConvertToInt64 : Floating-point convert

    /// svint64_t svcvt_s64[_f32]_m(svint64_t inactive, svbool_t pg, svfloat32_t op) : "FCVTZS Ztied.D, Pg/M, Zop.S" or "MOVPRFX Zresult, Zinactive; FCVTZS Zresult.D, Pg/M, Zop.S"
    /// svint64_t svcvt_s64[_f32]_x(svbool_t pg, svfloat32_t op) : "FCVTZS Ztied.D, Pg/M, Ztied.S" or "MOVPRFX Zresult, Zop; FCVTZS Zresult.D, Pg/M, Zop.S"
    /// svint64_t svcvt_s64[_f32]_z(svbool_t pg, svfloat32_t op) : "MOVPRFX Zresult.D, Pg/Z, Zop.D; FCVTZS Zresult.D, Pg/M, Zop.S"
  public static unsafe Vector<long> ConvertToInt64(Vector<float> value);

    /// svint64_t svcvt_s64[_f64]_m(svint64_t inactive, svbool_t pg, svfloat64_t op) : "FCVTZS Ztied.D, Pg/M, Zop.D" or "MOVPRFX Zresult, Zinactive; FCVTZS Zresult.D, Pg/M, Zop.D"
    /// svint64_t svcvt_s64[_f64]_x(svbool_t pg, svfloat64_t op) : "FCVTZS Ztied.D, Pg/M, Ztied.D" or "MOVPRFX Zresult, Zop; FCVTZS Zresult.D, Pg/M, Zop.D"
    /// svint64_t svcvt_s64[_f64]_z(svbool_t pg, svfloat64_t op) : "MOVPRFX Zresult.D, Pg/Z, Zop.D; FCVTZS Zresult.D, Pg/M, Zop.D"
  public static unsafe Vector<long> ConvertToInt64(Vector<double> value);


    /// ConvertToSingle : Floating-point convert

    /// svfloat32_t svcvt_f32[_f64]_m(svfloat32_t inactive, svbool_t pg, svfloat64_t op) : "FCVT Ztied.S, Pg/M, Zop.D" or "MOVPRFX Zresult, Zinactive; FCVT Zresult.S, Pg/M, Zop.D"
    /// svfloat32_t svcvt_f32[_f64]_x(svbool_t pg, svfloat64_t op) : "FCVT Ztied.S, Pg/M, Ztied.D" or "MOVPRFX Zresult, Zop; FCVT Zresult.S, Pg/M, Zop.D"
    /// svfloat32_t svcvt_f32[_f64]_z(svbool_t pg, svfloat64_t op) : "MOVPRFX Zresult.D, Pg/Z, Zop.D; FCVT Zresult.S, Pg/M, Zop.D"
  public static unsafe Vector<float> ConvertToSingle(Vector<double> value);

    /// svfloat32_t svcvt_f32[_s32]_m(svfloat32_t inactive, svbool_t pg, svint32_t op) : "SCVTF Ztied.S, Pg/M, Zop.S" or "MOVPRFX Zresult, Zinactive; SCVTF Zresult.S, Pg/M, Zop.S"
    /// svfloat32_t svcvt_f32[_s32]_x(svbool_t pg, svint32_t op) : "SCVTF Ztied.S, Pg/M, Ztied.S" or "MOVPRFX Zresult, Zop; SCVTF Zresult.S, Pg/M, Zop.S"
    /// svfloat32_t svcvt_f32[_s32]_z(svbool_t pg, svint32_t op) : "MOVPRFX Zresult.S, Pg/Z, Zop.S; SCVTF Zresult.S, Pg/M, Zop.S"
  public static unsafe Vector<float> ConvertToSingle(Vector<int> value);

    /// svfloat32_t svcvt_f32[_s64]_m(svfloat32_t inactive, svbool_t pg, svint64_t op) : "SCVTF Ztied.S, Pg/M, Zop.D" or "MOVPRFX Zresult, Zinactive; SCVTF Zresult.S, Pg/M, Zop.D"
    /// svfloat32_t svcvt_f32[_s64]_x(svbool_t pg, svint64_t op) : "SCVTF Ztied.S, Pg/M, Ztied.D" or "MOVPRFX Zresult, Zop; SCVTF Zresult.S, Pg/M, Zop.D"
    /// svfloat32_t svcvt_f32[_s64]_z(svbool_t pg, svint64_t op) : "MOVPRFX Zresult.D, Pg/Z, Zop.D; SCVTF Zresult.S, Pg/M, Zop.D"
  public static unsafe Vector<float> ConvertToSingle(Vector<long> value);

    /// svfloat32_t svcvt_f32[_u32]_m(svfloat32_t inactive, svbool_t pg, svuint32_t op) : "UCVTF Ztied.S, Pg/M, Zop.S" or "MOVPRFX Zresult, Zinactive; UCVTF Zresult.S, Pg/M, Zop.S"
    /// svfloat32_t svcvt_f32[_u32]_x(svbool_t pg, svuint32_t op) : "UCVTF Ztied.S, Pg/M, Ztied.S" or "MOVPRFX Zresult, Zop; UCVTF Zresult.S, Pg/M, Zop.S"
    /// svfloat32_t svcvt_f32[_u32]_z(svbool_t pg, svuint32_t op) : "MOVPRFX Zresult.S, Pg/Z, Zop.S; UCVTF Zresult.S, Pg/M, Zop.S"
  public static unsafe Vector<float> ConvertToSingle(Vector<uint> value);

    /// svfloat32_t svcvt_f32[_u64]_m(svfloat32_t inactive, svbool_t pg, svuint64_t op) : "UCVTF Ztied.S, Pg/M, Zop.D" or "MOVPRFX Zresult, Zinactive; UCVTF Zresult.S, Pg/M, Zop.D"
    /// svfloat32_t svcvt_f32[_u64]_x(svbool_t pg, svuint64_t op) : "UCVTF Ztied.S, Pg/M, Ztied.D" or "MOVPRFX Zresult, Zop; UCVTF Zresult.S, Pg/M, Zop.D"
    /// svfloat32_t svcvt_f32[_u64]_z(svbool_t pg, svuint64_t op) : "MOVPRFX Zresult.D, Pg/Z, Zop.D; UCVTF Zresult.S, Pg/M, Zop.D"
  public static unsafe Vector<float> ConvertToSingle(Vector<ulong> value);


    /// ConvertToUInt32 : Floating-point convert

    /// svuint32_t svcvt_u32[_f32]_m(svuint32_t inactive, svbool_t pg, svfloat32_t op) : "FCVTZU Ztied.S, Pg/M, Zop.S" or "MOVPRFX Zresult, Zinactive; FCVTZU Zresult.S, Pg/M, Zop.S"
    /// svuint32_t svcvt_u32[_f32]_x(svbool_t pg, svfloat32_t op) : "FCVTZU Ztied.S, Pg/M, Ztied.S" or "MOVPRFX Zresult, Zop; FCVTZU Zresult.S, Pg/M, Zop.S"
    /// svuint32_t svcvt_u32[_f32]_z(svbool_t pg, svfloat32_t op) : "MOVPRFX Zresult.S, Pg/Z, Zop.S; FCVTZU Zresult.S, Pg/M, Zop.S"
  public static unsafe Vector<uint> ConvertToUInt32(Vector<float> value);

    /// svuint32_t svcvt_u32[_f64]_m(svuint32_t inactive, svbool_t pg, svfloat64_t op) : "FCVTZU Ztied.S, Pg/M, Zop.D" or "MOVPRFX Zresult, Zinactive; FCVTZU Zresult.S, Pg/M, Zop.D"
    /// svuint32_t svcvt_u32[_f64]_x(svbool_t pg, svfloat64_t op) : "FCVTZU Ztied.S, Pg/M, Ztied.D" or "MOVPRFX Zresult, Zop; FCVTZU Zresult.S, Pg/M, Zop.D"
    /// svuint32_t svcvt_u32[_f64]_z(svbool_t pg, svfloat64_t op) : "MOVPRFX Zresult.D, Pg/Z, Zop.D; FCVTZU Zresult.S, Pg/M, Zop.D"
  public static unsafe Vector<uint> ConvertToUInt32(Vector<double> value);


    /// ConvertToUInt64 : Floating-point convert

    /// svuint64_t svcvt_u64[_f32]_m(svuint64_t inactive, svbool_t pg, svfloat32_t op) : "FCVTZU Ztied.D, Pg/M, Zop.S" or "MOVPRFX Zresult, Zinactive; FCVTZU Zresult.D, Pg/M, Zop.S"
    /// svuint64_t svcvt_u64[_f32]_x(svbool_t pg, svfloat32_t op) : "FCVTZU Ztied.D, Pg/M, Ztied.S" or "MOVPRFX Zresult, Zop; FCVTZU Zresult.D, Pg/M, Zop.S"
    /// svuint64_t svcvt_u64[_f32]_z(svbool_t pg, svfloat32_t op) : "MOVPRFX Zresult.D, Pg/Z, Zop.D; FCVTZU Zresult.D, Pg/M, Zop.S"
  public static unsafe Vector<ulong> ConvertToUInt64(Vector<float> value);

    /// svuint64_t svcvt_u64[_f64]_m(svuint64_t inactive, svbool_t pg, svfloat64_t op) : "FCVTZU Ztied.D, Pg/M, Zop.D" or "MOVPRFX Zresult, Zinactive; FCVTZU Zresult.D, Pg/M, Zop.D"
    /// svuint64_t svcvt_u64[_f64]_x(svbool_t pg, svfloat64_t op) : "FCVTZU Ztied.D, Pg/M, Ztied.D" or "MOVPRFX Zresult, Zop; FCVTZU Zresult.D, Pg/M, Zop.D"
    /// svuint64_t svcvt_u64[_f64]_z(svbool_t pg, svfloat64_t op) : "MOVPRFX Zresult.D, Pg/Z, Zop.D; FCVTZU Zresult.D, Pg/M, Zop.D"
  public static unsafe Vector<ulong> ConvertToUInt64(Vector<double> value);


    /// FloatingPointExponentialAccelerator : Floating-point exponential accelerator

    /// svfloat32_t svexpa[_f32](svuint32_t op) : "FEXPA Zresult.S, Zop.S"
  public static unsafe Vector<float> FloatingPointExponentialAccelerator(Vector<uint> value);

    /// svfloat64_t svexpa[_f64](svuint64_t op) : "FEXPA Zresult.D, Zop.D"
  public static unsafe Vector<double> FloatingPointExponentialAccelerator(Vector<ulong> value);


    /// MultiplyAddRotateComplex : Complex multiply-add with rotate

    /// svfloat32_t svcmla[_f32]_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3, uint64_t imm_rotation) : "FCMLA Ztied1.S, Pg/M, Zop2.S, Zop3.S, #imm_rotation" or "MOVPRFX Zresult, Zop1; FCMLA Zresult.S, Pg/M, Zop2.S, Zop3.S, #imm_rotation"
    /// svfloat32_t svcmla[_f32]_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3, uint64_t imm_rotation) : "FCMLA Ztied1.S, Pg/M, Zop2.S, Zop3.S, #imm_rotation" or "MOVPRFX Zresult, Zop1; FCMLA Zresult.S, Pg/M, Zop2.S, Zop3.S, #imm_rotation"
    /// svfloat32_t svcmla[_f32]_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3, uint64_t imm_rotation) : "MOVPRFX Zresult.S, Pg/Z, Zop1.S; FCMLA Zresult.S, Pg/M, Zop2.S, Zop3.S, #imm_rotation"
  public static unsafe Vector<float> MultiplyAddRotateComplex(Vector<float> op1, Vector<float> op2, Vector<float> op3, ulong imm_rotation);

    /// svfloat64_t svcmla[_f64]_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3, uint64_t imm_rotation) : "FCMLA Ztied1.D, Pg/M, Zop2.D, Zop3.D, #imm_rotation" or "MOVPRFX Zresult, Zop1; FCMLA Zresult.D, Pg/M, Zop2.D, Zop3.D, #imm_rotation"
    /// svfloat64_t svcmla[_f64]_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3, uint64_t imm_rotation) : "FCMLA Ztied1.D, Pg/M, Zop2.D, Zop3.D, #imm_rotation" or "MOVPRFX Zresult, Zop1; FCMLA Zresult.D, Pg/M, Zop2.D, Zop3.D, #imm_rotation"
    /// svfloat64_t svcmla[_f64]_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3, uint64_t imm_rotation) : "MOVPRFX Zresult.D, Pg/Z, Zop1.D; FCMLA Zresult.D, Pg/M, Zop2.D, Zop3.D, #imm_rotation"
  public static unsafe Vector<double> MultiplyAddRotateComplex(Vector<double> op1, Vector<double> op2, Vector<double> op3, ulong imm_rotation);

    /// svfloat32_t svcmla_lane[_f32](svfloat32_t op1, svfloat32_t op2, svfloat32_t op3, uint64_t imm_index, uint64_t imm_rotation) : "FCMLA Ztied1.S, Zop2.S, Zop3.S[imm_index], #imm_rotation" or "MOVPRFX Zresult, Zop1; FCMLA Zresult.S, Zop2.S, Zop3.S[imm_index], #imm_rotation"
  public static unsafe Vector<float> MultiplyAddRotateComplex(Vector<float> op1, Vector<float> op2, Vector<float> op3, ulong imm_index, ulong imm_rotation);


    /// ReciprocalEstimate : Reciprocal estimate

    /// svfloat32_t svrecpe[_f32](svfloat32_t op) : "FRECPE Zresult.S, Zop.S"
  public static unsafe Vector<float> ReciprocalEstimate(Vector<float> value);

    /// svfloat64_t svrecpe[_f64](svfloat64_t op) : "FRECPE Zresult.D, Zop.D"
  public static unsafe Vector<double> ReciprocalEstimate(Vector<double> value);


    /// ReciprocalExponent : Reciprocal exponent

    /// svfloat32_t svrecpx[_f32]_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op) : "FRECPX Ztied.S, Pg/M, Zop.S" or "MOVPRFX Zresult, Zinactive; FRECPX Zresult.S, Pg/M, Zop.S"
    /// svfloat32_t svrecpx[_f32]_x(svbool_t pg, svfloat32_t op) : "FRECPX Ztied.S, Pg/M, Ztied.S" or "MOVPRFX Zresult, Zop; FRECPX Zresult.S, Pg/M, Zop.S"
    /// svfloat32_t svrecpx[_f32]_z(svbool_t pg, svfloat32_t op) : "MOVPRFX Zresult.S, Pg/Z, Zop.S; FRECPX Zresult.S, Pg/M, Zop.S"
  public static unsafe Vector<float> ReciprocalExponent(Vector<float> value);

    /// svfloat64_t svrecpx[_f64]_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op) : "FRECPX Ztied.D, Pg/M, Zop.D" or "MOVPRFX Zresult, Zinactive; FRECPX Zresult.D, Pg/M, Zop.D"
    /// svfloat64_t svrecpx[_f64]_x(svbool_t pg, svfloat64_t op) : "FRECPX Ztied.D, Pg/M, Ztied.D" or "MOVPRFX Zresult, Zop; FRECPX Zresult.D, Pg/M, Zop.D"
    /// svfloat64_t svrecpx[_f64]_z(svbool_t pg, svfloat64_t op) : "MOVPRFX Zresult.D, Pg/Z, Zop.D; FRECPX Zresult.D, Pg/M, Zop.D"
  public static unsafe Vector<double> ReciprocalExponent(Vector<double> value);


    /// ReciprocalSqrtEstimate : Reciprocal square root estimate

    /// svfloat32_t svrsqrte[_f32](svfloat32_t op) : "FRSQRTE Zresult.S, Zop.S"
  public static unsafe Vector<float> ReciprocalSqrtEstimate(Vector<float> value);

    /// svfloat64_t svrsqrte[_f64](svfloat64_t op) : "FRSQRTE Zresult.D, Zop.D"
  public static unsafe Vector<double> ReciprocalSqrtEstimate(Vector<double> value);


    /// ReciprocalSqrtStep : Reciprocal square root step

    /// svfloat32_t svrsqrts[_f32](svfloat32_t op1, svfloat32_t op2) : "FRSQRTS Zresult.S, Zop1.S, Zop2.S"
  public static unsafe Vector<float> ReciprocalSqrtStep(Vector<float> left, Vector<float> right);

    /// svfloat64_t svrsqrts[_f64](svfloat64_t op1, svfloat64_t op2) : "FRSQRTS Zresult.D, Zop1.D, Zop2.D"
  public static unsafe Vector<double> ReciprocalSqrtStep(Vector<double> left, Vector<double> right);


    /// ReciprocalStep : Reciprocal step

    /// svfloat32_t svrecps[_f32](svfloat32_t op1, svfloat32_t op2) : "FRECPS Zresult.S, Zop1.S, Zop2.S"
  public static unsafe Vector<float> ReciprocalStep(Vector<float> left, Vector<float> right);

    /// svfloat64_t svrecps[_f64](svfloat64_t op1, svfloat64_t op2) : "FRECPS Zresult.D, Zop1.D, Zop2.D"
  public static unsafe Vector<double> ReciprocalStep(Vector<double> left, Vector<double> right);


    /// RoundAwayFromZero : Round to nearest, ties away from zero

    /// svfloat32_t svrinta[_f32]_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op) : "FRINTA Ztied.S, Pg/M, Zop.S" or "MOVPRFX Zresult, Zinactive; FRINTA Zresult.S, Pg/M, Zop.S"
    /// svfloat32_t svrinta[_f32]_x(svbool_t pg, svfloat32_t op) : "FRINTA Ztied.S, Pg/M, Ztied.S" or "MOVPRFX Zresult, Zop; FRINTA Zresult.S, Pg/M, Zop.S"
    /// svfloat32_t svrinta[_f32]_z(svbool_t pg, svfloat32_t op) : "MOVPRFX Zresult.S, Pg/Z, Zop.S; FRINTA Zresult.S, Pg/M, Zop.S"
  public static unsafe Vector<float> RoundAwayFromZero(Vector<float> value);

    /// svfloat64_t svrinta[_f64]_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op) : "FRINTA Ztied.D, Pg/M, Zop.D" or "MOVPRFX Zresult, Zinactive; FRINTA Zresult.D, Pg/M, Zop.D"
    /// svfloat64_t svrinta[_f64]_x(svbool_t pg, svfloat64_t op) : "FRINTA Ztied.D, Pg/M, Ztied.D" or "MOVPRFX Zresult, Zop; FRINTA Zresult.D, Pg/M, Zop.D"
    /// svfloat64_t svrinta[_f64]_z(svbool_t pg, svfloat64_t op) : "MOVPRFX Zresult.D, Pg/Z, Zop.D; FRINTA Zresult.D, Pg/M, Zop.D"
  public static unsafe Vector<double> RoundAwayFromZero(Vector<double> value);


    /// RoundToNearest : Round to nearest, ties to even

    /// svfloat32_t svrintn[_f32]_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op) : "FRINTN Ztied.S, Pg/M, Zop.S" or "MOVPRFX Zresult, Zinactive; FRINTN Zresult.S, Pg/M, Zop.S"
    /// svfloat32_t svrintn[_f32]_x(svbool_t pg, svfloat32_t op) : "FRINTN Ztied.S, Pg/M, Ztied.S" or "MOVPRFX Zresult, Zop; FRINTN Zresult.S, Pg/M, Zop.S"
    /// svfloat32_t svrintn[_f32]_z(svbool_t pg, svfloat32_t op) : "MOVPRFX Zresult.S, Pg/Z, Zop.S; FRINTN Zresult.S, Pg/M, Zop.S"
  public static unsafe Vector<float> RoundToNearest(Vector<float> value);

    /// svfloat64_t svrintn[_f64]_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op) : "FRINTN Ztied.D, Pg/M, Zop.D" or "MOVPRFX Zresult, Zinactive; FRINTN Zresult.D, Pg/M, Zop.D"
    /// svfloat64_t svrintn[_f64]_x(svbool_t pg, svfloat64_t op) : "FRINTN Ztied.D, Pg/M, Ztied.D" or "MOVPRFX Zresult, Zop; FRINTN Zresult.D, Pg/M, Zop.D"
    /// svfloat64_t svrintn[_f64]_z(svbool_t pg, svfloat64_t op) : "MOVPRFX Zresult.D, Pg/Z, Zop.D; FRINTN Zresult.D, Pg/M, Zop.D"
  public static unsafe Vector<double> RoundToNearest(Vector<double> value);


    /// RoundToNegativeInfinity : Round towards -∞

    /// svfloat32_t svrintm[_f32]_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op) : "FRINTM Ztied.S, Pg/M, Zop.S" or "MOVPRFX Zresult, Zinactive; FRINTM Zresult.S, Pg/M, Zop.S"
    /// svfloat32_t svrintm[_f32]_x(svbool_t pg, svfloat32_t op) : "FRINTM Ztied.S, Pg/M, Ztied.S" or "MOVPRFX Zresult, Zop; FRINTM Zresult.S, Pg/M, Zop.S"
    /// svfloat32_t svrintm[_f32]_z(svbool_t pg, svfloat32_t op) : "MOVPRFX Zresult.S, Pg/Z, Zop.S; FRINTM Zresult.S, Pg/M, Zop.S"
  public static unsafe Vector<float> RoundToNegativeInfinity(Vector<float> value);

    /// svfloat64_t svrintm[_f64]_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op) : "FRINTM Ztied.D, Pg/M, Zop.D" or "MOVPRFX Zresult, Zinactive; FRINTM Zresult.D, Pg/M, Zop.D"
    /// svfloat64_t svrintm[_f64]_x(svbool_t pg, svfloat64_t op) : "FRINTM Ztied.D, Pg/M, Ztied.D" or "MOVPRFX Zresult, Zop; FRINTM Zresult.D, Pg/M, Zop.D"
    /// svfloat64_t svrintm[_f64]_z(svbool_t pg, svfloat64_t op) : "MOVPRFX Zresult.D, Pg/Z, Zop.D; FRINTM Zresult.D, Pg/M, Zop.D"
  public static unsafe Vector<double> RoundToNegativeInfinity(Vector<double> value);


    /// RoundToPositiveInfinity : Round towards +∞

    /// svfloat32_t svrintp[_f32]_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op) : "FRINTP Ztied.S, Pg/M, Zop.S" or "MOVPRFX Zresult, Zinactive; FRINTP Zresult.S, Pg/M, Zop.S"
    /// svfloat32_t svrintp[_f32]_x(svbool_t pg, svfloat32_t op) : "FRINTP Ztied.S, Pg/M, Ztied.S" or "MOVPRFX Zresult, Zop; FRINTP Zresult.S, Pg/M, Zop.S"
    /// svfloat32_t svrintp[_f32]_z(svbool_t pg, svfloat32_t op) : "MOVPRFX Zresult.S, Pg/Z, Zop.S; FRINTP Zresult.S, Pg/M, Zop.S"
  public static unsafe Vector<float> RoundToPositiveInfinity(Vector<float> value);

    /// svfloat64_t svrintp[_f64]_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op) : "FRINTP Ztied.D, Pg/M, Zop.D" or "MOVPRFX Zresult, Zinactive; FRINTP Zresult.D, Pg/M, Zop.D"
    /// svfloat64_t svrintp[_f64]_x(svbool_t pg, svfloat64_t op) : "FRINTP Ztied.D, Pg/M, Ztied.D" or "MOVPRFX Zresult, Zop; FRINTP Zresult.D, Pg/M, Zop.D"
    /// svfloat64_t svrintp[_f64]_z(svbool_t pg, svfloat64_t op) : "MOVPRFX Zresult.D, Pg/Z, Zop.D; FRINTP Zresult.D, Pg/M, Zop.D"
  public static unsafe Vector<double> RoundToPositiveInfinity(Vector<double> value);


    /// RoundToZero : Round towards zero

    /// svfloat32_t svrintz[_f32]_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op) : "FRINTZ Ztied.S, Pg/M, Zop.S" or "MOVPRFX Zresult, Zinactive; FRINTZ Zresult.S, Pg/M, Zop.S"
    /// svfloat32_t svrintz[_f32]_x(svbool_t pg, svfloat32_t op) : "FRINTZ Ztied.S, Pg/M, Ztied.S" or "MOVPRFX Zresult, Zop; FRINTZ Zresult.S, Pg/M, Zop.S"
    /// svfloat32_t svrintz[_f32]_z(svbool_t pg, svfloat32_t op) : "MOVPRFX Zresult.S, Pg/Z, Zop.S; FRINTZ Zresult.S, Pg/M, Zop.S"
  public static unsafe Vector<float> RoundToZero(Vector<float> value);

    /// svfloat64_t svrintz[_f64]_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op) : "FRINTZ Ztied.D, Pg/M, Zop.D" or "MOVPRFX Zresult, Zinactive; FRINTZ Zresult.D, Pg/M, Zop.D"
    /// svfloat64_t svrintz[_f64]_x(svbool_t pg, svfloat64_t op) : "FRINTZ Ztied.D, Pg/M, Ztied.D" or "MOVPRFX Zresult, Zop; FRINTZ Zresult.D, Pg/M, Zop.D"
    /// svfloat64_t svrintz[_f64]_z(svbool_t pg, svfloat64_t op) : "MOVPRFX Zresult.D, Pg/Z, Zop.D; FRINTZ Zresult.D, Pg/M, Zop.D"
  public static unsafe Vector<double> RoundToZero(Vector<double> value);


    /// Scale : Adjust exponent

    /// svfloat32_t svscale[_f32]_m(svbool_t pg, svfloat32_t op1, svint32_t op2) : "FSCALE Ztied1.S, Pg/M, Ztied1.S, Zop2.S" or "MOVPRFX Zresult, Zop1; FSCALE Zresult.S, Pg/M, Zresult.S, Zop2.S"
    /// svfloat32_t svscale[_f32]_x(svbool_t pg, svfloat32_t op1, svint32_t op2) : "FSCALE Ztied1.S, Pg/M, Ztied1.S, Zop2.S" or "MOVPRFX Zresult, Zop1; FSCALE Zresult.S, Pg/M, Zresult.S, Zop2.S"
    /// svfloat32_t svscale[_f32]_z(svbool_t pg, svfloat32_t op1, svint32_t op2) : "MOVPRFX Zresult.S, Pg/Z, Zop1.S; FSCALE Zresult.S, Pg/M, Zresult.S, Zop2.S"
  public static unsafe Vector<float> Scale(Vector<float> left, Vector<int> right);

    /// svfloat64_t svscale[_f64]_m(svbool_t pg, svfloat64_t op1, svint64_t op2) : "FSCALE Ztied1.D, Pg/M, Ztied1.D, Zop2.D" or "MOVPRFX Zresult, Zop1; FSCALE Zresult.D, Pg/M, Zresult.D, Zop2.D"
    /// svfloat64_t svscale[_f64]_x(svbool_t pg, svfloat64_t op1, svint64_t op2) : "FSCALE Ztied1.D, Pg/M, Ztied1.D, Zop2.D" or "MOVPRFX Zresult, Zop1; FSCALE Zresult.D, Pg/M, Zresult.D, Zop2.D"
    /// svfloat64_t svscale[_f64]_z(svbool_t pg, svfloat64_t op1, svint64_t op2) : "MOVPRFX Zresult.D, Pg/Z, Zop1.D; FSCALE Zresult.D, Pg/M, Zresult.D, Zop2.D"
  public static unsafe Vector<double> Scale(Vector<double> left, Vector<long> right);


    /// Sqrt : Square root

    /// svfloat32_t svsqrt[_f32]_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op) : "FSQRT Ztied.S, Pg/M, Zop.S" or "MOVPRFX Zresult, Zinactive; FSQRT Zresult.S, Pg/M, Zop.S"
    /// svfloat32_t svsqrt[_f32]_x(svbool_t pg, svfloat32_t op) : "FSQRT Ztied.S, Pg/M, Ztied.S" or "MOVPRFX Zresult, Zop; FSQRT Zresult.S, Pg/M, Zop.S"
    /// svfloat32_t svsqrt[_f32]_z(svbool_t pg, svfloat32_t op) : "MOVPRFX Zresult.S, Pg/Z, Zop.S; FSQRT Zresult.S, Pg/M, Zop.S"
  public static unsafe Vector<float> Sqrt(Vector<float> value);

    /// svfloat64_t svsqrt[_f64]_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op) : "FSQRT Ztied.D, Pg/M, Zop.D" or "MOVPRFX Zresult, Zinactive; FSQRT Zresult.D, Pg/M, Zop.D"
    /// svfloat64_t svsqrt[_f64]_x(svbool_t pg, svfloat64_t op) : "FSQRT Ztied.D, Pg/M, Ztied.D" or "MOVPRFX Zresult, Zop; FSQRT Zresult.D, Pg/M, Zop.D"
    /// svfloat64_t svsqrt[_f64]_z(svbool_t pg, svfloat64_t op) : "MOVPRFX Zresult.D, Pg/Z, Zop.D; FSQRT Zresult.D, Pg/M, Zop.D"
  public static unsafe Vector<double> Sqrt(Vector<double> value);


    /// TrigonometricMultiplyAddCoefficient : Trigonometric multiply-add coefficient

    /// svfloat32_t svtmad[_f32](svfloat32_t op1, svfloat32_t op2, uint64_t imm3) : "FTMAD Ztied1.S, Ztied1.S, Zop2.S, #imm3" or "MOVPRFX Zresult, Zop1; FTMAD Zresult.S, Zresult.S, Zop2.S, #imm3"
  public static unsafe Vector<float> TrigonometricMultiplyAddCoefficient(Vector<float> op1, Vector<float> op2, ulong imm3);

    /// svfloat64_t svtmad[_f64](svfloat64_t op1, svfloat64_t op2, uint64_t imm3) : "FTMAD Ztied1.D, Ztied1.D, Zop2.D, #imm3" or "MOVPRFX Zresult, Zop1; FTMAD Zresult.D, Zresult.D, Zop2.D, #imm3"
  public static unsafe Vector<double> TrigonometricMultiplyAddCoefficient(Vector<double> op1, Vector<double> op2, ulong imm3);


    /// TrigonometricSelectCoefficient : Trigonometric select coefficient

    /// svfloat32_t svtssel[_f32](svfloat32_t op1, svuint32_t op2) : "FTSSEL Zresult.S, Zop1.S, Zop2.S"
  public static unsafe Vector<float> TrigonometricSelectCoefficient(Vector<float> left, Vector<uint> right);

    /// svfloat64_t svtssel[_f64](svfloat64_t op1, svuint64_t op2) : "FTSSEL Zresult.D, Zop1.D, Zop2.D"
  public static unsafe Vector<double> TrigonometricSelectCoefficient(Vector<double> left, Vector<ulong> right);


    /// TrigonometricStartingValue : Trigonometric starting value

    /// svfloat32_t svtsmul[_f32](svfloat32_t op1, svuint32_t op2) : "FTSMUL Zresult.S, Zop1.S, Zop2.S"
  public static unsafe Vector<float> TrigonometricStartingValue(Vector<float> left, Vector<uint> right);

    /// svfloat64_t svtsmul[_f64](svfloat64_t op1, svuint64_t op2) : "FTSMUL Zresult.D, Zop1.D, Zop2.D"
  public static unsafe Vector<double> TrigonometricStartingValue(Vector<double> left, Vector<ulong> right);


  /// total method signatures: 57
  /// total method names:      27
}

a74nh · 2023-10-26T09:18:33Z

  /// Optional Entries:
  ///   public static unsafe Vector<float> Scale(Vector<float> left, int right); // svscale[_n_f32]_m or svscale[_n_f32]_x or svscale[_n_f32]_z
  ///   public static unsafe Vector<double> Scale(Vector<double> left, long right); // svscale[_n_f64]_m or svscale[_n_f64]_x or svscale[_n_f64]_z
  ///   Total Maybe: 2

  /// Rejected:
  ///   public static unsafe Vector<float> RoundUsingCurrentRoundingModeExact(Vector<float> value); // svrintx[_f32]_m or svrintx[_f32]_x or svrintx[_f32]_z
  ///   public static unsafe Vector<double> RoundUsingCurrentRoundingModeExact(Vector<double> value); // svrintx[_f64]_m or svrintx[_f64]_x or svrintx[_f64]_z
  ///   public static unsafe Vector<float> RoundUsingCurrentRoundingModeInexact(Vector<float> value); // svrinti[_f32]_m or svrinti[_f32]_x or svrinti[_f32]_z
  ///   public static unsafe Vector<double> RoundUsingCurrentRoundingModeInexact(Vector<double> value); // svrinti[_f64]_m or svrinti[_f64]_x or svrinti[_f64]_z
  ///   Total Rejected: 4

  /// Total ACLE covered across API:      151

a74nh · 2023-10-26T09:19:37Z

This contributes to #93095

It covers instructions in FEAT_SVE related to floating point operations.

This list was auto generated from the C ACLE for SVE, and is in three parts:

The methods list reduced down to Vector versions. All possible varaints of T are given above the method.
The complete list of all methods. The corresponding ACLE methods and SVE instructions are given above the method.
All rejected ACLE methods. These are methods we have agreed that do not need including in C#.
Where possible, existing C# naming conventions have been matched.

Many of the C functions include predicate argument(s), of type svbool_t as the first argument. These are missing from the C# method. It is expected that the Jit will create predicates where required, or combine with uses of conditionalSelect(). For more discussion see #88140 comment.

tannergooding · 2023-10-26T17:39:41Z

*Reduce should be *Across to match the naming in AdvSimd.

FADDA is just doing e0 + e1 + .. eN rather than doing the (e0 + e1) + (e2 + e3) + ... approach that AddAcross does, correct? I wonder if Sequential or another name might be clearer given that Ordered has a common alternative meaning to IEEE 754 floating-point types.

For AdjustExponent, this is basically a hardware version of ScaleB (which we expose as float.ScaleB). For Avx512F we just called it Scale (since the B, which is 2 for binary floats, is implied). This then matches the hardware instruction name as well.

We probably want to discuss in API review whether ComplexAddRotate or AddRotateComplex is better terminology here. I believe we typically prefer the type at the end.

For FloatingPointConvert the AdvSimd APIs use names like ConvertToInt32RoundAwayFromZero or ConvertToDouble, both so we can disambiguate the return type and so the semantic is clear.

For SquareRoot, we prefer Sqrt to match the name of the primary Math API. Same for ReciprocalSqrtEstimate

For RoundToNearestTiesToEven, we just called it RoundToNearest in AdvSimd as that is the default rounding mode for IEEE 754 floating-points. Similarly we just used names like RoundAwayFromZero, RoundToNegativeInfinity, RoundToPositiveInfinity, and RoundToZero.

We did not expose FRINTX for AdvSimd because .NET doesn't support floating-point exceptions being enabled. We similarly didn't expose FRINTI because we don't support changing the global floating-point rounding mode.

Trigonometric* are new concepts and would likely benefit from a small explanation of what they do and how they benefit APIs like Cos/Sin, etc.

ghost · 2023-10-26T17:39:54Z

This issue has been marked needs-author-action and may be missing some important information.

a74nh · 2023-10-27T13:50:57Z

FADDA is just doing e0 + e1 + .. eN rather than doing the (e0 + e1) + (e2 + e3) + ... approach that AddAcross does, correct? I wonder if Sequential or another name might be clearer given that Ordered has a common alternative meaning to IEEE 754 floating-point types.

SADDV/UADDV doesn't specify how the elements are added together.

FADDA is the only version of add across for fp. Maybe for the API the ordered distinction isn't required and it could just be a variant of AddAcross().
Otherwise, I'm happy with sequential.

For AdjustExponent, this is basically a hardware version of ScaleB (which we expose as float.ScaleB). For Avx512F we just called it Scale (since the B, which is 2 for binary floats, is implied). This then matches the hardware instruction name as well.

Done.

We probably want to discuss in API review whether ComplexAddRotate or AddRotateComplex is better terminology here. I believe we typically prefer the type at the end.

Switched for now.

For FloatingPointConvert the AdvSimd APIs use names like ConvertToInt32RoundAwayFromZero or ConvertToDouble, both so we can disambiguate the return type and so the semantic is clear.

Done. Note that the API now shows:

  /// T: [long, float], [long, double]
  public static unsafe Vector<T> ConvertToInt64(Vector<T2> value); // FCVTZS (MOVPRFX)

Ideally it would just be:

  /// T: float, double
  public static unsafe long ConvertToInt64(Vector<T> value); // FCVTZS (MOVPRFX)

But that's scripting limitations I'd rather not fix for now.

Note, I've used ConvertToSignedInt32() etc for the singed variants.

For SquareRoot, we prefer Sqrt to match the name of the primary Math API. Same for ReciprocalSqrtEstimate

AdvSimd has ReciprocalSquareRootEstimate.
Changed for SVE.

For RoundToNearestTiesToEven, we just called it RoundToNearest in AdvSimd as that is the default rounding mode for IEEE 754 floating-points. Similarly we just used names like RoundAwayFromZero, RoundToNegativeInfinity, RoundToPositiveInfinity, and RoundToZero.

Done.

We did not expose FRINTX for AdvSimd because .NET doesn't support floating-point exceptions being enabled. We similarly didn't expose FRINTI because we don't support changing the global floating-point rounding mode.

Done.

Trigonometric* are new concepts and would likely benefit from a small explanation of what they do and how they benefit APIs like Cos/Sin, etc.

Done.

a74nh · 2023-10-27T14:07:22Z

Trigonometric* are new concepts and would likely benefit from a small explanation of what they do and how they benefit APIs like Cos/Sin, etc.

I've included a description, mostly taken from the architecture manual. But, I don't really know much about this group at all, so can't say how/why you would use them. Curiously, the ACLE document avoids a description and just gives a link to the architecture manual.

tannergooding · 2023-10-27T17:55:18Z

FADDA is the only version of add across for fp

Ah, I was probably misremembering then. We implement the cross platform Sum, for floating-point, using pairwise logic, because it reliably exists across platforms. Naturally the order doesn't matter for integers.

Note, I've used ConvertToSignedInt32() etc for the singed variants.

Int32 (signed) vs UInt32 (unsigned) would be the "preferred" terminology here. We have the following in .NET:

Bitwidth	Signed Type	Unsigned Type
8	SByte	Byte
16	Int16	UInt16
32	Int32	UInt32
64	Int64	UInt64
Ptr	NInt	NUInt
128	Int128	UInt128

We then for floating-point have (which would necessitate ConvertToFloat becoming ConvertToSingle):

Bitwidth	Type
16	Half
32	Single
64	Double

a74nh · 2023-10-30T16:52:28Z

Int32 (signed) vs UInt32 (unsigned) would be the "preferred" terminology here. We have the following in .NET:

Updated to use this.

a74nh · 2023-11-15T13:54:58Z

Some of these methods could cause a floating point exception. Should a conditionalSelect() with a relevant mask cause the exception to not happen?

tannergooding · 2023-11-15T16:54:15Z

.NET doesn't support IEEE 754 floating-point exception handling and it is disabled on startup. Enabling it is undefined behavior.

It is highly unlikely we are to support the feature in the future either, but we would discuss how that works at that time if it were to happen.

I imagine that we would treat it a lot like we do LoadAligned on x86, which is that for perf/efficiency reasons and the most common default, we would allow a slight difference in behavior between T0 (throw) and T1 (non throwing).

a74nh · 2023-11-15T17:01:46Z

.NET doesn't support IEEE 754 floating-point exception handling and it is disabled on startup. Enabling it is undefined behavior.

It is highly unlikely we are to support the feature in the future either, but we would discuss how that works at that time if it were to happen.

I imagine that we would treat it a lot like we do LoadAligned on x86, which is that for perf/efficiency reasons and the most common default, we would allow a slight difference in behavior between T0 (throw) and T1 (non throwing).

Ok, so it's still ok to hide the mask for FP then. That simplifies things.

bartonjs · 2024-01-23T19:37:23Z

Video

AddRotateComplex parameters (Vector<T> op1, Vector<T> op2, ulong imm_rotation) => (Vector<T> left, Vector<T> right, [ConstantExpected] byte rotation)
AddSequentialAcross should return Vector<T>
AddSequentialAcross (T initial, Vector<T> op) => (Vector<T> initial, Vector<T> value)
MultiplyAddRotateComplex (Vector<T> op1, Vector<T> op2, Vector<T> op3, ulong imm_rotation) => (Vector<T> addend, Vector<T> left, Vector<T> right, [ConstantExpected] byte rotation)
MultiplyAddRotateComplex with two immediates was renamed to MultiplyAddRotateComplexBySelectedScalar for consistency
- And ulong imm_index, ulong imm_rotation => [ConstantExpected] rightIndex, [ConstantExpected] rotation
TrigonometricMultiplyAddCoefficient (Vector<T> op1, Vector<T> op2, ulong imm3) => (Vector<T> left, Vector<T> right, [ConstantExpected] byte control)
TrigonometricSelectCoefficient (left, right) => (value, selector)
TrigonometricStartingValue (left, right) => (value, sign)
The optional overloads of Scale were cut as the JIT can handle it without these overloads.

namespace System.Runtime.Intrinsics.Arm;

/// VectorT Summary
public abstract class Sve : AdvSimd /// Feature: FEAT_SVE  Category: fp
{
  /// T: float, double
  public static unsafe Vector<T> AddRotateComplex(Vector<T> left, Vector<T> right, [ConstantExpected] byte rotation); // FCADD // predicated, MOVPRFX

  /// T: float, double
  public static unsafe Vector<T> AddSequentialAcross(Vector<T> initial, Vector<T> value); // FADDA // predicated

  /// T: [double, float], [double, int], [double, long], [double, uint], [double, ulong]
  public static unsafe Vector<T> ConvertToDouble(Vector<T2> value); // FCVT or SCVTF or UCVTF // predicated, MOVPRFX

  /// T: [int, float], [int, double]
  public static unsafe Vector<T> ConvertToInt32(Vector<T2> value); // FCVTZS // predicated, MOVPRFX

  /// T: [long, float], [long, double]
  public static unsafe Vector<T> ConvertToInt64(Vector<T2> value); // FCVTZS // predicated, MOVPRFX

  /// T: [float, double], [float, int], [float, long], [float, uint], [float, ulong]
  public static unsafe Vector<T> ConvertToSingle(Vector<T2> value); // FCVT or SCVTF or UCVTF // predicated, MOVPRFX

  /// T: [uint, float], [uint, double]
  public static unsafe Vector<T> ConvertToUInt32(Vector<T2> value); // FCVTZU // predicated, MOVPRFX

  /// T: [ulong, float], [ulong, double]
  public static unsafe Vector<T> ConvertToUInt64(Vector<T2> value); // FCVTZU // predicated, MOVPRFX

  /// T: [float, uint], [double, ulong]
  public static unsafe Vector<T> FloatingPointExponentialAccelerator(Vector<T2> value); // FEXPA

  /// T: float, double
  public static unsafe Vector<T> MultiplyAddRotateComplex(Vector<T> addend, Vector<T> left, Vector<T> right, [ConstantExpected] byte rotation); // FCMLA // predicated, MOVPRFX

  public static unsafe Vector<float> MultiplyAddRotateComplexBySelectedScalar(Vector<float> addend, Vector<float> left, Vector<float> right, [ConstantExpected] byte rightIndex, [ConstantExpected] byte rotation); // FCMLA // MOVPRFX

  /// T: float, double
  public static unsafe Vector<T> ReciprocalEstimate(Vector<T> value); // FRECPE

  /// T: float, double
  public static unsafe Vector<T> ReciprocalExponent(Vector<T> value); // FRECPX // predicated, MOVPRFX

  /// T: float, double
  public static unsafe Vector<T> ReciprocalSqrtEstimate(Vector<T> value); // FRSQRTE

  /// T: float, double
  public static unsafe Vector<T> ReciprocalSqrtStep(Vector<T> left, Vector<T> right); // FRSQRTS

  /// T: float, double
  public static unsafe Vector<T> ReciprocalStep(Vector<T> left, Vector<T> right); // FRECPS

  /// T: float, double
  public static unsafe Vector<T> RoundAwayFromZero(Vector<T> value); // FRINTA // predicated, MOVPRFX

  /// T: float, double
  public static unsafe Vector<T> RoundToNearest(Vector<T> value); // FRINTN // predicated, MOVPRFX

  /// T: float, double
  public static unsafe Vector<T> RoundToNegativeInfinity(Vector<T> value); // FRINTM // predicated, MOVPRFX

  /// T: float, double
  public static unsafe Vector<T> RoundToPositiveInfinity(Vector<T> value); // FRINTP // predicated, MOVPRFX

  /// T: float, double
  public static unsafe Vector<T> RoundToZero(Vector<T> value); // FRINTZ // predicated, MOVPRFX

  /// T: [float, int], [double, long]
  public static unsafe Vector<T> Scale(Vector<T> left, Vector<T2> right); // FSCALE // predicated, MOVPRFX

  /// T: float, double
  public static unsafe Vector<T> Sqrt(Vector<T> value); // FSQRT // predicated, MOVPRFX

  /// T: float, double
  public static unsafe Vector<T> TrigonometricMultiplyAddCoefficient(Vector<T> left, Vector<T> right, [ConstantExpected] byte control); // FTMAD // MOVPRFX

  /// T: [float, uint], [double, ulong]
  public static unsafe Vector<T> TrigonometricSelectCoefficient(Vector<T> value, Vector<T2> selector); // FTSSEL

  /// T: [float, uint], [double, ulong]
  public static unsafe Vector<T> TrigonometricStartingValue(Vector<T> value, Vector<T2> sign); // FTSMUL
}

ghost · 2024-02-08T04:54:22Z

Tagging subscribers to this area: @dotnet/area-system-runtime-intrinsics
See info in area-owners.md if you want to be subscribed.

Issue Details

namespace System.Runtime.Intrinsics.Arm

/// VectorT Summary
public abstract class Sve : AdvSimd /// Feature: FEAT_SVE  Category: fp
{

  /// T: float, double
  public static unsafe Vector<T> AddRotateComplex(Vector<T> op1, Vector<T> op2, ulong imm_rotation); // FCADD // predicated, MOVPRFX

  /// T: float, double
  public static unsafe T AddSequentialAcross(T initial, Vector<T> op); // FADDA // predicated

  /// T: [double, float], [double, int], [double, long], [double, uint], [double, ulong]
  public static unsafe Vector<T> ConvertToDouble(Vector<T2> value); // FCVT or SCVTF or UCVTF // predicated, MOVPRFX

  /// T: [int, float], [int, double]
  public static unsafe Vector<T> ConvertToInt32(Vector<T2> value); // FCVTZS // predicated, MOVPRFX

  /// T: [long, float], [long, double]
  public static unsafe Vector<T> ConvertToInt64(Vector<T2> value); // FCVTZS // predicated, MOVPRFX

  /// T: [float, double], [float, int], [float, long], [float, uint], [float, ulong]
  public static unsafe Vector<T> ConvertToSingle(Vector<T2> value); // FCVT or SCVTF or UCVTF // predicated, MOVPRFX

  /// T: [uint, float], [uint, double]
  public static unsafe Vector<T> ConvertToUInt32(Vector<T2> value); // FCVTZU // predicated, MOVPRFX

  /// T: [ulong, float], [ulong, double]
  public static unsafe Vector<T> ConvertToUInt64(Vector<T2> value); // FCVTZU // predicated, MOVPRFX

  /// T: [float, uint], [double, ulong]
  public static unsafe Vector<T> FloatingPointExponentialAccelerator(Vector<T2> value); // FEXPA

  /// T: float, double
  public static unsafe Vector<T> MultiplyAddRotateComplex(Vector<T> op1, Vector<T> op2, Vector<T> op3, ulong imm_rotation); // FCMLA // predicated, MOVPRFX

  public static unsafe Vector<float> MultiplyAddRotateComplex(Vector<float> op1, Vector<float> op2, Vector<float> op3, ulong imm_index, ulong imm_rotation); // FCMLA // MOVPRFX

  /// T: float, double
  public static unsafe Vector<T> ReciprocalEstimate(Vector<T> value); // FRECPE

  /// T: float, double
  public static unsafe Vector<T> ReciprocalExponent(Vector<T> value); // FRECPX // predicated, MOVPRFX

  /// T: float, double
  public static unsafe Vector<T> ReciprocalSqrtEstimate(Vector<T> value); // FRSQRTE

  /// T: float, double
  public static unsafe Vector<T> ReciprocalSqrtStep(Vector<T> left, Vector<T> right); // FRSQRTS

  /// T: float, double
  public static unsafe Vector<T> ReciprocalStep(Vector<T> left, Vector<T> right); // FRECPS

  /// T: float, double
  public static unsafe Vector<T> RoundAwayFromZero(Vector<T> value); // FRINTA // predicated, MOVPRFX

  /// T: float, double
  public static unsafe Vector<T> RoundToNearest(Vector<T> value); // FRINTN // predicated, MOVPRFX

  /// T: float, double
  public static unsafe Vector<T> RoundToNegativeInfinity(Vector<T> value); // FRINTM // predicated, MOVPRFX

  /// T: float, double
  public static unsafe Vector<T> RoundToPositiveInfinity(Vector<T> value); // FRINTP // predicated, MOVPRFX

  /// T: float, double
  public static unsafe Vector<T> RoundToZero(Vector<T> value); // FRINTZ // predicated, MOVPRFX

  /// T: [float, int], [double, long]
  public static unsafe Vector<T> Scale(Vector<T> left, Vector<T2> right); // FSCALE // predicated, MOVPRFX

  /// T: float, double
  public static unsafe Vector<T> Sqrt(Vector<T> value); // FSQRT // predicated, MOVPRFX

  /// T: float, double
  public static unsafe Vector<T> TrigonometricMultiplyAddCoefficient(Vector<T> op1, Vector<T> op2, ulong imm3); // FTMAD // MOVPRFX

  /// T: [float, uint], [double, ulong]
  public static unsafe Vector<T> TrigonometricSelectCoefficient(Vector<T> left, Vector<T2> right); // FTSSEL

  /// T: [float, uint], [double, ulong]
  public static unsafe Vector<T> TrigonometricStartingValue(Vector<T> left, Vector<T2> right); // FTSMUL

  /// total method signatures: 26


  /// Optional Entries:

  public static unsafe Vector<float> Scale(Vector<float> left, int right); // FSCALE // predicated, MOVPRFX

  public static unsafe Vector<double> Scale(Vector<double> left, long right); // FSCALE // predicated, MOVPRFX

  /// total optional method signatures: 2

}