Skip to content

Commit

Permalink
Convert Mat4 to column vector multiply.
Browse files Browse the repository at this point in the history
  • Loading branch information
bitshifter committed Jun 7, 2019
1 parent e2fb22b commit c978c1a
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 269 deletions.
276 changes: 22 additions & 254 deletions src/f32/mat4.rs
Original file line number Diff line number Diff line change
Expand Up @@ -373,190 +373,6 @@ impl Mat4 {
inverse * rcp_det
}

// #[cfg(all(target_feature = "sse2", not(feature = "scalar-math")))]
// /// Performs a matrix inverse. Note that this method does not check if the matrix is
// /// invertible and will divide by zero if a non-invertible matrix is inverted.
// pub fn inverse(&self) -> Self {
// // sse2 implemenation based off DirectXMath XMMatrixInverse (MIT License)
// #[cfg(target_arch = "x86")]
// use std::arch::x86::*;
// #[cfg(target_arch = "x86_64")]
// use std::arch::x86_64::*;

// macro_rules! permute {
// ($v:expr, $mask:expr) => {
// _mm_shuffle_ps($v, $v, $mask)
// };
// };

// macro_rules! _MM_SHUFFLE {
// ($z:expr, $y:expr, $x:expr, $w:expr) => {
// ($z << 6) | ($y << 4) | ($x << 2) | $w
// };
// };

// macro_rules! neg_mul_sub {
// ($a:expr, $b:expr, $c:expr) => {
// _mm_sub_ps($c, _mm_mul_ps($a, $b))
// };
// };

// macro_rules! mul_add {
// ($a:expr, $b:expr, $c:expr) => {
// _mm_add_ps(_mm_mul_ps($a, $b), $c)
// };
// };

// let mt = self.transpose();
// let mtx = mt.x_axis().into();
// let mty = mt.y_axis().into();
// let mtz = mt.z_axis().into();
// let mtw = mt.w_axis().into();

// unsafe {
// let mut v00 = permute!(mtz, _MM_SHUFFLE!(1, 1, 0, 0));
// let mut v10 = permute!(mtw, _MM_SHUFFLE!(3, 2, 3, 2));
// let mut v01 = permute!(mtx, _MM_SHUFFLE!(1, 1, 0, 0));
// let mut v11 = permute!(mty, _MM_SHUFFLE!(3, 2, 3, 2));
// let mut v02 = _mm_shuffle_ps(mtz, mtx, _MM_SHUFFLE!(2, 0, 2, 0));
// let mut v12 = _mm_shuffle_ps(mtw, mty, _MM_SHUFFLE!(3, 1, 3, 1));

// let mut d0 = _mm_mul_ps(v00, v10);
// let mut d1 = _mm_mul_ps(v01, v11);
// let mut d2 = _mm_mul_ps(v02, v12);

// v00 = permute!(mtz, _MM_SHUFFLE!(3, 2, 3, 2));
// v10 = permute!(mtw, _MM_SHUFFLE!(1, 1, 0, 0));
// v01 = permute!(mtx, _MM_SHUFFLE!(3, 2, 3, 2));
// v11 = permute!(mty, _MM_SHUFFLE!(1, 1, 0, 0));
// v02 = _mm_shuffle_ps(mtz, mtx, _MM_SHUFFLE!(3, 1, 3, 1));
// v12 = _mm_shuffle_ps(mtw, mty, _MM_SHUFFLE!(2, 0, 2, 0));

// // v00 = _mm_mul_ps(v00 * v10);
// // v01 = _mm_mul_ps(v01 * v11);
// // v02 = _mm_mul_ps(v02 * v12);
// // d0 = _mm_sub_ps(d0,v00);
// // d1 = _mm_sub_ps(d1,v01);
// // d2 = _mm_sub_ps(d2,v02);
// d0 = neg_mul_sub!(v00, v10, d0);
// d1 = neg_mul_sub!(v01, v11, d1);
// d2 = neg_mul_sub!(v02, v12, d2);

// // V11 = D0Y,D0W,D2Y,D2Y
// v11 = _mm_shuffle_ps(d0, d2, _MM_SHUFFLE!(1, 1, 3, 1));
// v00 = permute!(mty, _MM_SHUFFLE!(1, 0, 2, 1));
// v10 = _mm_shuffle_ps(v11, d0, _MM_SHUFFLE!(0, 3, 0, 2));
// v01 = permute!(mtx, _MM_SHUFFLE!(0, 1, 0, 2));
// v11 = _mm_shuffle_ps(v11, d0, _MM_SHUFFLE!(2, 1, 2, 1));

// // V13 = D1Y,D1W,D2W,D2W
// let mut v13 = _mm_shuffle_ps(d1, d2, _MM_SHUFFLE!(3, 3, 3, 1));
// v02 = permute!(mtw, _MM_SHUFFLE!(1, 0, 2, 1));
// v12 = _mm_shuffle_ps(v13, d1, _MM_SHUFFLE!(0, 3, 0, 2));
// let mut v03 = permute!(mtz, _MM_SHUFFLE!(0, 1, 0, 2));
// v13 = _mm_shuffle_ps(v13, d1, _MM_SHUFFLE!(2, 1, 2, 1));

// let mut c0 = _mm_mul_ps(v00, v10);
// let mut c2 = _mm_mul_ps(v01, v11);
// let mut c4 = _mm_mul_ps(v02, v12);
// let mut c6 = _mm_mul_ps(v03, v13);

// // V11 = D0X,D0Y,D2X,D2X
// v11 = _mm_shuffle_ps(d0, d2, _MM_SHUFFLE!(0, 0, 1, 0));
// v10 = _mm_shuffle_ps(d0, v11, _MM_SHUFFLE!(2, 1, 0, 3));
// v01 = permute!(mtx, _MM_SHUFFLE!(1, 3, 2, 3));
// v11 = _mm_shuffle_ps(d0, v11, _MM_SHUFFLE!(0, 2, 1, 2));

// // V13 = D1X,D1Y,D2Z,D2Z
// v13 = _mm_shuffle_ps(d1, d2, _MM_SHUFFLE!(2, 2, 1, 0));
// v02 = permute!(mtw, _MM_SHUFFLE!(2, 1, 3, 2));
// v12 = _mm_shuffle_ps(d1, v13, _MM_SHUFFLE!(2, 1, 0, 3));
// v03 = permute!(mtz, _MM_SHUFFLE!(1, 3, 2, 3));
// v13 = _mm_shuffle_ps(d1, v13, _MM_SHUFFLE!(0, 2, 1, 2));

// // V00 = _mm_mul_ps(V00,V10);
// // V01 = _mm_mul_ps(V01,V11);
// // V02 = _mm_mul_ps(V02,V12);
// // V03 = _mm_mul_ps(V03,V13);
// // C0 = _mm_sub_ps(C0,V00);
// // C2 = _mm_sub_ps(C2,V01);
// // C4 = _mm_sub_ps(C4,V02);
// // C6 = _mm_sub_ps(C6,V03);
// c0 = neg_mul_sub!(v00, v10, c0);
// c2 = neg_mul_sub!(v01, v11, c2);
// c4 = neg_mul_sub!(v02, v12, c4);
// c6 = neg_mul_sub!(v03, v13, c6);

// v00 = permute!(mty, _MM_SHUFFLE!(0, 3, 0, 3));
// // V10 = D0Z,D0Z,D2X,D2Y
// v10 = _mm_shuffle_ps(d0, d2, _MM_SHUFFLE!(1, 0, 2, 2));
// v10 = permute!(v10, _MM_SHUFFLE!(0, 2, 3, 0));
// v01 = permute!(mtx, _MM_SHUFFLE!(2, 0, 3, 1));
// // V11 = D0X,D0W,D2X,D2Y
// v11 = _mm_shuffle_ps(d0, d2, _MM_SHUFFLE!(1, 0, 3, 0));
// v11 = permute!(v11, _MM_SHUFFLE!(2, 1, 0, 3));
// v02 = permute!(mtw, _MM_SHUFFLE!(0, 3, 0, 3));
// // V12 = D1Z,D1Z,D2Z,D2W
// v12 = _mm_shuffle_ps(d1, d2, _MM_SHUFFLE!(3, 2, 2, 2));
// v12 = permute!(v12, _MM_SHUFFLE!(0, 2, 3, 0));
// v03 = permute!(mtz, _MM_SHUFFLE!(2, 0, 3, 1));
// // V13 = D1X,D1W,D2Z,D2W
// v13 = _mm_shuffle_ps(d1, d2, _MM_SHUFFLE!(3, 2, 3, 0));
// v13 = permute!(v13, _MM_SHUFFLE!(2, 1, 0, 3));

// // V00 = _mm_mul_ps(V00,V10);
// // V01 = _mm_mul_ps(V01,V11);
// // V02 = _mm_mul_ps(V02,V12);
// // V03 = _mm_mul_ps(V03,V13);
// // XMVECTOR C1 = _mm_sub_ps(C0,V00);
// // C0 = _mm_add_ps(C0,V00);
// // XMVECTOR C3 = _mm_add_ps(C2,V01);
// // C2 = _mm_sub_ps(C2,V01);
// // XMVECTOR C5 = _mm_sub_ps(C4,V02);
// // C4 = _mm_add_ps(C4,V02);
// // XMVECTOR C7 = _mm_add_ps(C6,V03);
// // C6 = _mm_sub_ps(C6,V03);
// let c1 = neg_mul_sub!(v00, v10, c0);
// c0 = mul_add!(v00, v10, c0);
// let c3 = mul_add!(v01, v11, c2);
// c2 = neg_mul_sub!(v01, v11, c2);
// let c5 = neg_mul_sub!(v02, v12, c4);
// c4 = mul_add!(v02, v12, c4);
// let c7 = mul_add!(v03, v13, c6);
// c6 = neg_mul_sub!(v03, v13, c6);

// c0 = _mm_shuffle_ps(c0, c1, _MM_SHUFFLE!(3, 1, 2, 0));
// c2 = _mm_shuffle_ps(c2, c3, _MM_SHUFFLE!(3, 1, 2, 0));
// c4 = _mm_shuffle_ps(c4, c5, _MM_SHUFFLE!(3, 1, 2, 0));
// c6 = _mm_shuffle_ps(c6, c7, _MM_SHUFFLE!(3, 1, 2, 0));
// let c0 = Vec4(permute!(c0, _MM_SHUFFLE!(3, 1, 2, 0)));
// let c2 = Vec4(permute!(c2, _MM_SHUFFLE!(3, 1, 2, 0)));
// let c4 = Vec4(permute!(c4, _MM_SHUFFLE!(3, 1, 2, 0)));
// let c6 = Vec4(permute!(c6, _MM_SHUFFLE!(3, 1, 2, 0)));

// // Get the determinate
// // XMVECTOR vTemp = XMVector4Dot(C0,mtx);
// let det = Vec4::splat(c0.dot(Vec4(mtx)));
// // if (pDeterminant != nullptr)
// // *pDeterminant = vTemp;
// // vTemp = _mm_div_ps(g_XMOne,vTemp);
// let inv_det = Vec4::splat(1.0) / det;

// // XMMATRIX mResult;
// // mResult.r[0] = _mm_mul_ps(C0,vTemp);
// // mResult.r[1] = _mm_mul_ps(C2,vTemp);
// // mResult.r[2] = _mm_mul_ps(C4,vTemp);
// // mResult.r[3] = _mm_mul_ps(C6,vTemp);
// // return mResult;
// Self {
// x_axis: c0 * inv_det,
// y_axis: c2 * inv_det,
// z_axis: c4 * inv_det,
// w_axis: c6 * inv_det,
// }
// }
// }

#[inline]
// TODO: make public at some point
fn look_to_lh(eye: Vec3, dir: Vec3, up: Vec3) -> Self {
Expand Down Expand Up @@ -642,40 +458,24 @@ impl Mat4 {
// }
// }


#[inline]
pub fn mul_vec4(&self, rhs: Vec4) -> Vec4 {
let mut res = self.x_axis * rhs.dup_x();
res = self.y_axis.mul_add(rhs.dup_y(), res);
res = self.z_axis.mul_add(rhs.dup_z(), res);
res = self.w_axis.mul_add(rhs.dup_w(), res);
res
}

#[inline]
/// Multiplies two 4x4 matrices.
/// Multiplication order is as follows:
/// `local_to_world = local_to_object * local_to_world`
pub fn mul_mat4(&self, rhs: &Self) -> Self {
let mut tmp = self.x_axis.dup_x().mul(rhs.x_axis);
tmp = self.x_axis.dup_y().mul_add(rhs.y_axis, tmp);
tmp = self.x_axis.dup_z().mul_add(rhs.z_axis, tmp);
tmp = self.x_axis.dup_w().mul_add(rhs.w_axis, tmp);
let x_axis = tmp;

tmp = self.y_axis.dup_x().mul(rhs.x_axis);
tmp = self.y_axis.dup_y().mul_add(rhs.y_axis, tmp);
tmp = self.y_axis.dup_z().mul_add(rhs.z_axis, tmp);
tmp = self.y_axis.dup_w().mul_add(rhs.w_axis, tmp);
let y_axis = tmp;

tmp = self.z_axis.dup_x().mul(rhs.x_axis);
tmp = self.z_axis.dup_y().mul_add(rhs.y_axis, tmp);
tmp = self.z_axis.dup_z().mul_add(rhs.z_axis, tmp);
tmp = self.z_axis.dup_w().mul_add(rhs.w_axis, tmp);
let z_axis = tmp;

tmp = self.w_axis.dup_x().mul(rhs.x_axis);
tmp = self.w_axis.dup_y().mul_add(rhs.y_axis, tmp);
tmp = self.w_axis.dup_z().mul_add(rhs.z_axis, tmp);
tmp = self.w_axis.dup_w().mul_add(rhs.w_axis, tmp);
let w_axis = tmp;

Self {
x_axis,
y_axis,
z_axis,
w_axis,
x_axis: self.mul_vec4(rhs.x_axis),
y_axis: self.mul_vec4(rhs.y_axis),
z_axis: self.mul_vec4(rhs.z_axis),
w_axis: self.mul_vec4(rhs.w_axis),
}
}

Expand Down Expand Up @@ -709,41 +509,17 @@ impl Mat4 {
w_axis: self.w_axis * s,
}
}
}

// implemented here so they don't need to be duplicated between f32x4 and f32 versions
impl Vec3 {
#[inline]
/// Multiplies a 4x4 matrix and a 3D point.
/// Multiplication order is as follows:
/// `world_position = local_position.transform_mat4(local_to_world)`
pub fn transform_mat4(self, rhs: &Mat4) -> Self {
pub fn transform_point3(&self, rhs: Vec3) -> Vec3 {
// TODO: optimise
self.extend(1.0).transform_mat4(rhs).truncate()
self.mul_vec4(rhs.extend(1.0)).truncate()
}

#[inline]
/// Multiplies a 4x4 matrix and a 3D direction vector. Translation is not applied.
/// Multiplication order is as follows:
/// `world_direction = local_direction.transform_mat4(local_to_world)`
pub fn rotate_mat4(self, rhs: &Mat4) -> Self {
pub fn transform_vector3(&self, rhs: Vec3) -> Vec3 {
// TODO: optimise
self.extend(0.0).transform_mat4(rhs).truncate()
}
}

// implemented here so they don't need to be duplicated between f32x4 and f32 versions
impl Vec4 {
#[inline]
/// Multiplies a 4x4 matrix and a 4D vector.
/// Multiplication order is as follows:
/// `world_position = local_position.transform_mat4(local_to_world)`
pub fn transform_mat4(self, rhs: &Mat4) -> Self {
let mut tmp = self.dup_x().mul(rhs.x_axis());
tmp = self.dup_y().mul_add(rhs.y_axis(), tmp);
tmp = self.dup_z().mul_add(rhs.z_axis(), tmp);
tmp = self.dup_w().mul_add(rhs.w_axis(), tmp);
tmp
self.mul_vec4(rhs.extend(0.0)).truncate()
}
}

Expand Down Expand Up @@ -793,19 +569,11 @@ impl Mul<Mat4> for Mat4 {
}
}

// impl Mul<Mat4> for Vec3 {
// type Output = Vec3;
// #[inline]
// fn mul(self, rhs: Mat4) -> Vec3 {
// self.transform_mat4(&rhs)
// }
// }

impl Mul<Mat4> for Vec4 {
type Output = Self;
impl Mul<Vec4> for Mat4 {
type Output = Vec4;
#[inline]
fn mul(self, rhs: Mat4) -> Self {
self.transform_mat4(&rhs)
fn mul(self, rhs: Vec4) -> Vec4 {
self.mul_vec4(rhs)
}
}

Expand Down
31 changes: 16 additions & 15 deletions tests/mat4.rs
Original file line number Diff line number Diff line change
Expand Up @@ -112,25 +112,25 @@ fn test_from_rotation() {
#[test]
fn test_mat4_mul() {
let mat_a = Mat4::from_axis_angle(Vec3::unit_z(), deg(90.0));
let result3 = Vec3::unit_y().rotate_mat4(&mat_a);
let result3 = mat_a.transform_vector3(Vec3::unit_y());
assert_ulps_eq!(vec3(-1.0, 0.0, 0.0), result3);
assert_ulps_eq!(result3, (Vec3::unit_y().extend(0.0) * mat_a).truncate());
let result4 = Vec4::unit_y().transform_mat4(&mat_a);
assert_ulps_eq!(result3, (mat_a * Vec3::unit_y().extend(0.0)).truncate());
let result4 = mat_a * Vec4::unit_y();
assert_ulps_eq!(vec4(-1.0, 0.0, 0.0, 0.0), result4);
assert_ulps_eq!(result4, Vec4::unit_y() * mat_a);
assert_ulps_eq!(result4, mat_a * Vec4::unit_y());

let mat_b = Mat4::from_scale_rotation_translation(
Vec3::new(0.5, 1.5, 2.0),
Quat::from_rotation_x(deg(90.0)),
Vec3::new(1.0, 2.0, 3.0),
);
let result3 = Vec3::unit_y().rotate_mat4(&mat_b);
let result3 = mat_b.transform_vector3(Vec3::unit_y());
assert_ulps_eq!(vec3(0.0, 0.0, 1.5), result3, epsilon = 1.0e-6);
assert_ulps_eq!(result3, (Vec3::unit_y().extend(0.0) * mat_b).truncate());
assert_ulps_eq!(result3, (mat_b * Vec3::unit_y().extend(0.0)).truncate());

let result3 = Vec3::unit_y().transform_mat4(&mat_b);
let result3 = mat_b.transform_point3(Vec3::unit_y());
assert_ulps_eq!(vec3(1.0, 2.0, 4.5), result3, epsilon = 1.0e-6);
assert_ulps_eq!(result3, (Vec3::unit_y().extend(1.0) * mat_b).truncate());
assert_ulps_eq!(result3, (mat_b * Vec3::unit_y().extend(1.0)).truncate());
}

#[test]
Expand All @@ -151,11 +151,12 @@ fn test_from_ypr() {
let z1 = Mat4::from_rotation_ypr(zero, zero, roll);
assert_ulps_eq!(z0, z1);

let yx0 = y0 * x0;
// TODO: fix ypr order
let yx0 = x0 * y0;
let yx1 = Mat4::from_rotation_ypr(yaw, pitch, zero);
assert_ulps_eq!(yx0, yx1);

let yxz0 = y0 * x0 * z0;
let yxz0 = z0 * x0 * y0;
let yxz1 = Mat4::from_rotation_ypr(yaw, pitch, roll);
assert_ulps_eq!(yxz0, yxz1);
}
Expand All @@ -164,7 +165,7 @@ fn test_from_ypr() {
fn test_from_scale() {
let m = Mat4::from_scale(Vec3::new(2.0, 4.0, 8.0));
assert_ulps_eq!(
Vec3::new(1.0, 1.0, 1.0).transform_mat4(&m),
m.transform_point3(Vec3::new(1.0, 1.0, 1.0)),
Vec3::new(2.0, 4.0, 8.0)
);
assert_ulps_eq!(Vec4::unit_x() * 2.0, m.x_axis());
Expand Down Expand Up @@ -233,8 +234,8 @@ fn test_mat4_inverse() {
let m_inv = m.inverse();
// assert_ne!(None, m_inv);
// let m_inv = m_inv.unwrap();
assert_ulps_eq!(Mat4::identity(), m * m_inv);
assert_ulps_eq!(Mat4::identity(), m_inv * m);
assert_ulps_eq!(Mat4::identity(), m * m_inv, epsilon = 1.0e-5);
assert_ulps_eq!(Mat4::identity(), m_inv * m, epsilon = 1.0e-5);
assert_ulps_eq!(m_inv, trans_inv * rotz_inv * scale_inv);
}

Expand All @@ -246,8 +247,8 @@ fn test_mat4_look_at() {
let lh = Mat4::look_at_lh(eye, center, up);
let rh = Mat4::look_at_rh(eye, center, up);
let point = Vec3::new(1.0, 0.0, 0.0);
assert_ulps_eq!(point.transform_mat4(&lh), Vec3::new(0.0, 1.0, -5.0));
assert_ulps_eq!(point.transform_mat4(&rh), Vec3::new(0.0, 1.0, 5.0));
assert_ulps_eq!(lh.transform_point3(point), Vec3::new(0.0, 1.0, -5.0));
assert_ulps_eq!(rh.transform_point3(point), Vec3::new(0.0, 1.0, 5.0));
}

#[test]
Expand Down

0 comments on commit c978c1a

Please sign in to comment.