From fa240d488ad98091da7a3cfbfc16665327d043c1 Mon Sep 17 00:00:00 2001
From: Gabor Rapcsanyi <rgabor@webkit.org>
Date: Sun, 10 Apr 2016 07:45:55 +0000
Subject: [PATCH] Merge r166233 - [ARM64] GNU assembler fails in
 TransformationMatrix::multiply https://bugs.webkit.org/show_bug.cgi?id=130454

Reviewed by Zoltan Herczeg.

Change the NEON intstructions to the proper style.

* platform/graphics/transforms/TransformationMatrix.cpp:
(WebCore::TransformationMatrix::multiply):
---
 Source/WebCore/ChangeLog                      | 12 +++
 .../transforms/TransformationMatrix.cpp       | 94 +++++++++----------
 2 files changed, 59 insertions(+), 47 deletions(-)

diff --git a/Source/WebCore/ChangeLog b/Source/WebCore/ChangeLog
index 39c5eb4f9a13..a07c24769997 100644
--- a/Source/WebCore/ChangeLog
+++ b/Source/WebCore/ChangeLog
@@ -1,3 +1,15 @@
+2014-03-25  Gabor Rapcsanyi  <rgabor@webkit.org>
+
+        [ARM64] GNU assembler fails in TransformationMatrix::multiply
+        https://bugs.webkit.org/show_bug.cgi?id=130454
+
+        Reviewed by Zoltan Herczeg.
+
+        Change the NEON intstructions to the proper style.
+
+        * platform/graphics/transforms/TransformationMatrix.cpp:
+        (WebCore::TransformationMatrix::multiply):
+
 2014-06-06  Brady Eidson  <beidson@apple.com>
 
         Initialize a char* that needs to be initialized.
diff --git a/Source/WebCore/platform/graphics/transforms/TransformationMatrix.cpp b/Source/WebCore/platform/graphics/transforms/TransformationMatrix.cpp
index dad23599be2c..c5066668d375 100644
--- a/Source/WebCore/platform/graphics/transforms/TransformationMatrix.cpp
+++ b/Source/WebCore/platform/graphics/transforms/TransformationMatrix.cpp
@@ -1040,61 +1040,61 @@ TransformationMatrix& TransformationMatrix::multiply(const TransformationMatrix&
     const double* rightMatrix = &(mat.m_matrix[0][0]);
     asm volatile (
         // First, load the leftMatrix completely in memory. The leftMatrix is in v16-v23.
-        "mov      x4, %[leftMatrix]\n\t"
-        "ld1.2d   {v16, v17, v18, v19}, [%[leftMatrix]], #64\n\t"
-        "ld1.2d   {v20, v21, v22, v23}, [%[leftMatrix]]\n\t"
+        "mov   x4, %[leftMatrix]\n\t"
+        "ld1   {v16.2d, v17.2d, v18.2d, v19.2d}, [%[leftMatrix]], #64\n\t"
+        "ld1   {v20.2d, v21.2d, v22.2d, v23.2d}, [%[leftMatrix]]\n\t"
 
         // First row.
-        "ld4r.2d  {v24, v25, v26, v27}, [%[rightMatrix]], #32\n\t"
-        "fmul.2d  v28, v24, v16\n\t"
-        "fmul.2d  v29, v24, v17\n\t"
-        "fmla.2d  v28, v25, v18\n\t"
-        "fmla.2d  v29, v25, v19\n\t"
-        "fmla.2d  v28, v26, v20\n\t"
-        "fmla.2d  v29, v26, v21\n\t"
-        "fmla.2d  v28, v27, v22\n\t"
-        "fmla.2d  v29, v27, v23\n\t"
-
-        "ld4r.2d  {v0, v1, v2, v3}, [%[rightMatrix]], #32\n\t"
-        "st1.2d  {v28, v29}, [x4], #32\n\t"
+        "ld4r  {v24.2d, v25.2d, v26.2d, v27.2d}, [%[rightMatrix]], #32\n\t"
+        "fmul  v28.2d, v24.2d, v16.2d\n\t"
+        "fmul  v29.2d, v24.2d, v17.2d\n\t"
+        "fmla  v28.2d, v25.2d, v18.2d\n\t"
+        "fmla  v29.2d, v25.2d, v19.2d\n\t"
+        "fmla  v28.2d, v26.2d, v20.2d\n\t"
+        "fmla  v29.2d, v26.2d, v21.2d\n\t"
+        "fmla  v28.2d, v27.2d, v22.2d\n\t"
+        "fmla  v29.2d, v27.2d, v23.2d\n\t"
+
+        "ld4r  {v0.2d, v1.2d, v2.2d, v3.2d}, [%[rightMatrix]], #32\n\t"
+        "st1  {v28.2d, v29.2d}, [x4], #32\n\t"
 
         // Second row.
-        "fmul.2d  v30, v0, v16\n\t"
-        "fmul.2d  v31, v0, v17\n\t"
-        "fmla.2d  v30, v1, v18\n\t"
-        "fmla.2d  v31, v1, v19\n\t"
-        "fmla.2d  v30, v2, v20\n\t"
-        "fmla.2d  v31, v2, v21\n\t"
-        "fmla.2d  v30, v3, v22\n\t"
-        "fmla.2d  v31, v3, v23\n\t"
-
-        "ld4r.2d  {v24, v25, v26, v27}, [%[rightMatrix]], #32\n\t"
-        "st1.2d   {v30, v31}, [x4], #32\n\t"
+        "fmul  v30.2d, v0.2d, v16.2d\n\t"
+        "fmul  v31.2d, v0.2d, v17.2d\n\t"
+        "fmla  v30.2d, v1.2d, v18.2d\n\t"
+        "fmla  v31.2d, v1.2d, v19.2d\n\t"
+        "fmla  v30.2d, v2.2d, v20.2d\n\t"
+        "fmla  v31.2d, v2.2d, v21.2d\n\t"
+        "fmla  v30.2d, v3.2d, v22.2d\n\t"
+        "fmla  v31.2d, v3.2d, v23.2d\n\t"
+
+        "ld4r  {v24.2d, v25.2d, v26.2d, v27.2d}, [%[rightMatrix]], #32\n\t"
+        "st1   {v30.2d, v31.2d}, [x4], #32\n\t"
 
         // Third row.
-        "fmul.2d  v28, v24, v16\n\t"
-        "fmul.2d  v29, v24, v17\n\t"
-        "fmla.2d  v28, v25, v18\n\t"
-        "fmla.2d  v29, v25, v19\n\t"
-        "fmla.2d  v28, v26, v20\n\t"
-        "fmla.2d  v29, v26, v21\n\t"
-        "fmla.2d  v28, v27, v22\n\t"
-        "fmla.2d  v29, v27, v23\n\t"
-
-        "ld4r.2d  {v0, v1, v2, v3}, [%[rightMatrix]], #32\n\t"
-        "st1.2d   {v28, v29}, [x4], #32\n\t"
+        "fmul  v28.2d, v24.2d, v16.2d\n\t"
+        "fmul  v29.2d, v24.2d, v17.2d\n\t"
+        "fmla  v28.2d, v25.2d, v18.2d\n\t"
+        "fmla  v29.2d, v25.2d, v19.2d\n\t"
+        "fmla  v28.2d, v26.2d, v20.2d\n\t"
+        "fmla  v29.2d, v26.2d, v21.2d\n\t"
+        "fmla  v28.2d, v27.2d, v22.2d\n\t"
+        "fmla  v29.2d, v27.2d, v23.2d\n\t"
+
+        "ld4r  {v0.2d, v1.2d, v2.2d, v3.2d}, [%[rightMatrix]], #32\n\t"
+        "st1   {v28.2d, v29.2d}, [x4], #32\n\t"
 
         // Fourth row.
-        "fmul.2d  v30, v0, v16\n\t"
-        "fmul.2d  v31, v0, v17\n\t"
-        "fmla.2d  v30, v1, v18\n\t"
-        "fmla.2d  v31, v1, v19\n\t"
-        "fmla.2d  v30, v2, v20\n\t"
-        "fmla.2d  v31, v2, v21\n\t"
-        "fmla.2d  v30, v3, v22\n\t"
-        "fmla.2d  v31, v3, v23\n\t"
-
-        "st1.2d  {v30, v31}, [x4]\n\t"
+        "fmul  v30.2d, v0.2d, v16.2d\n\t"
+        "fmul  v31.2d, v0.2d, v17.2d\n\t"
+        "fmla  v30.2d, v1.2d, v18.2d\n\t"
+        "fmla  v31.2d, v1.2d, v19.2d\n\t"
+        "fmla  v30.2d, v2.2d, v20.2d\n\t"
+        "fmla  v31.2d, v2.2d, v21.2d\n\t"
+        "fmla  v30.2d, v3.2d, v22.2d\n\t"
+        "fmla  v31.2d, v3.2d, v23.2d\n\t"
+
+        "st1  {v30.2d, v31.2d}, [x4]\n\t"
 
         : [leftMatrix]"+r"(leftMatrix), [rightMatrix]"+r"(rightMatrix)
         :